Mercurial > repos > fgiacomoni > bank_inhouse
view lib/bih.pm @ 0:be582bcd6585 draft
Master branch Updating - - Fxx
author | fgiacomoni |
---|---|
date | Thu, 04 Oct 2018 10:37:14 -0400 |
parents | |
children | 52798007c6b2 |
line wrap: on
line source
package lib::bih ; use strict; use warnings ; use Exporter ; use Carp ; use Math::BigFloat; use LWP::Simple; use LWP::UserAgent; use URI::URL; use SOAP::Lite; use Encode; use HTML::Template ; #use Net::SSL ; use Data::Dumper ; #use REST::Client; use JSON; use vars qw($VERSION @ISA @EXPORT %EXPORT_TAGS); our $VERSION = "1.0"; our @ISA = qw(Exporter); our @EXPORT = qw( db_pforest_get_clean_range map_pfjson_bankobject prepare_multi_masses_query ); our %EXPORT_TAGS = ( ALL => [qw( db_pforest_get_clean_range map_pfjson_bankobject prepare_multi_masses_query )] ); =head1 NAME My::Module - An example module =head1 SYNOPSIS use My::Module; my $object = My::Module->new(); print $object->as_string; =head1 DESCRIPTION This module does not really exist, it was made for the sole purpose of demonstrating how POD works. =head1 METHODS Methods are : =head2 METHOD new ## Description : new ## Input : $self ## Ouput : bless $self ; ## Usage : new() ; =cut sub new { ## Variables my $self={}; bless($self) ; return $self ; } ### END of SUB =head2 METHOD check_interval ## Description : checks that the value is in the interval ## Input : $value, $min, $max ## Output : $message ## Usage : $message= check_interval($value, $min, $max) ; =cut ## START of SUB sub check_interval { ## Retrieve Values my $self = shift ; my ( $value, $min, $max ) = @_ ; my ( $message ) = undef ; if ( $min !~ m/^-?\d+\.?\d*$/ ) { $message="the minimum '".$min."' isn't a valid number!"; } elsif ( $max !~ m/^-?\d+\.?\d*$/ ) { $message="the maximum '".$max."' isn't a valid number!"; } elsif ( $value !~ m/^-?\d+\.?\d*$/ ) { $message="'".$value."' isn't a valid number!"; } elsif ( $value < $min ) { $message="'".$value."' is below the minimum!"; } elsif ( $value > $max ) { $message="'".$value."' is greater than the maximum!"; } else { $message="OK" ; } return($message) ; } ## END of SUB =head2 METHOD format_manual_list_values ## Description : extract a list of values and built identifiers ## Input : $value, $sep ## Output : \@values, \@ids ## Usage : ($masses, $ids)= format_manual_list_values($mass, $sep) ; =cut ## START of SUB sub format_manual_list_values { ## Retrieve Values my $self = shift ; my ( $value, $sep ) = @_ ; my ( @values, @ids ) = ( (), () ) ; if ( ( defined $value ) and ( $value ne "" ) and ( defined $sep ) ) { @values = split($sep, $value); my $nb = 1+int(log($#values+1)/log(10)); my $sf = '%0'.$nb.'s'; for (my $i=1 ; $i<=$#values+1 ; $i++){ my $id = sprintf($sf, $i) ; push (@ids,"value_".$id ); } } else { croak "No value list found \n" ; } return(\@values, \@ids) ; } ## END of SUB =head2 METHOD parse_bank_interest ## Description : parse csv object and return a two-dimensional array in a hash by grouping information according to the interest value as key. ## Input : $csv, \$file, $col_interest ## Output : \%bank_interest, \$head ## Usage : my ( $bank_interest, $head ) = parse_bank_interest ( $csv, $file, $col_interest ) ; =cut ## START of SUB sub parse_bank_interest { ## Retrieve Values my $self = shift ; my ( $csv, $file, $col_interest ) = @_ ; my $bank_interest = () ; my $oBih = new() ; open my $fh, "<:encoding(utf8)", $$file or die "Can't open csv file $$file: $!"; my $head = $csv->getline( $fh ); my $nb_line = 1 ; ## for error messages while ( my $row = $csv->getline( $fh ) ) { $nb_line++ ; if ($#$head != $#$row) { croak "Not the same number of columns over the file of the interest bank! See the line: $nb_line in your input bank file!\n" ; } ## it would be more general to do the following masse check out this function my ($MZmessage) = $oBih->check_interval($$row[$col_interest], 0, 10000) ; if ($MZmessage ne 'OK') { $col_interest++; croak "There is at least one row (See the line : $nb_line) where in the column $col_interest : $MZmessage\n" ; } #/!\ col_interest++ to print to user a not-table (@) value. push (@{$bank_interest->{$$row[$col_interest]}}, $row ) ; } # print Dumper $bank_interest; exit; $csv->eof or $csv->error_diag(); close $fh; return($bank_interest, $head) ; } ## END of SUB =head2 METHOD mz_delta_conversion ## Description : returns the minimum and maximum mass according to the delta ## Input : \$mass, \$delta_type, \$mz_delta ## Output : \$min, \$max ## Usage : ($min, $max)= mz_delta_conversion($mass, $delta_type, $mz_delta) ; =cut ## START of SUB sub mz_delta_conversion { ## Retrieve Values my $self = shift ; my ( $mass, $delta_type, $mz_delta ) = @_ ; my ( $min, $max ) = ( undef, undef ) ; if ($$delta_type eq 'ppm'){ $min = $$mass - ($$mz_delta * 10**-6 * $$mass); $max = $$mass + ($$mz_delta * 10**-6 * $$mass) + 0.0000000001; ## it's to included the maximum value in the search } elsif ($$delta_type eq 'Da'){ $min = $$mass - $$mz_delta; $max = $$mass + $$mz_delta + 0.0000000001; ## it's to included the maximum value in the search } else { croak "The masses delta type '$$delta_type' isn't a valid type !\n" ; } return(\$min, \$max) ; } ## END of SUB =head2 METHOD dichotomi_search ## Description : returns the index of the position or the interval value ## does not work if there are duplicates in the table ## Input : \@tab, \$search ## Output : \$index ## Usage : ($index)= dichotomi_search($mass, $sep) ; =cut ## START of SUB sub dichotomi_search { ## Retrieve Values my $self = shift ; my ( $tab, $search ) = @_ ; my ($sup, $inf, $demi) = (scalar(@{$tab})-1, 0, 0); while(1) { $demi = int(($sup + $inf)/2); if($sup < $inf) { if($inf==0){ $demi=-1; } elsif($sup==scalar(@{$tab})-1){ $demi=scalar(@{$tab}); } ## to distinguish items off limits last; } elsif ( ($$search == $$tab[$demi]) ) { last; } elsif ( ($$search > $$tab[$demi-1]) && ($$search < @$tab[$demi]) ) { last; } elsif($$search < $$tab[$demi]) { $sup = $demi - 1; next; } else { $inf = $demi + 1; next; } } return(\$demi) ; } ## END of SUB =head2 METHOD extract_sub_mz_lists ## Description : extract a couples of sublist from a long mz list (more than $HMDB_LIMITS) ## Input : $HMDB_LIMITS, $masses ## Output : $sublists ## Usage : my ( $sublists ) = extract_sub_mz_lists( $HMDB_LIMITS, $masses ) ; =cut ## START of SUB sub extract_sub_mz_lists { ## Retrieve Values my $self = shift ; my ( $masses, $HMDB_LIMITS ) = @_ ; my ( @sublists, @sublist ) = ( (), () ) ; my $nb_mz = 0 ; my $nb_total_mzs = scalar(@{$masses}) ; for ( my $current_pos = 0 ; $current_pos < $nb_total_mzs ; $current_pos++ ) { if ( $nb_mz < $HMDB_LIMITS ) { if ( $masses->[$current_pos] ) { push (@sublist, $masses->[$current_pos]) ; $nb_mz++ ; } # build sub list } elsif ( $nb_mz == $HMDB_LIMITS ) { my @tmp = @sublist ; push (@sublists, \@tmp) ; @sublist = () ; $nb_mz = 0 ; $current_pos-- ; } if ($current_pos == $nb_total_mzs-1) { my @tmp = @sublist ; push (@sublists, \@tmp) ; } } return(\@sublists) ; } ## END of SUB =head2 METHOD prepare_multi_masses_query ## Description : permet de generer une liste de masses au format d'interrogation de hmdb ## Input : $masses ## Output : $hmdb_masses ## Usage : my ( $hmdb_masses ) = prepare_multi_masses_query( $masses ) ; =cut ## START of SUB sub prepare_multi_masses_query { ## Retrieve Values my $self = shift ; my ( $masses ) = @_ ; my $hmdb_masses = undef ; my $sep = '%0D%0A' ; ## retour chariot encode my ($nb_masses, $i) = (0, 0) ; if ( defined $masses ) { my @masses = @{$masses} ; my $nb_masses = scalar ( @masses ) ; if ( $nb_masses == 0 ) { croak "Your mass list is empty \n" ; } elsif ( $nb_masses >= 150 ) { croak "Your mass list is too long : HMDB allows maximum 150 query masses per request \n" ; } ## Del it --- temporary patch foreach my $mass (@masses) { if ($i < $nb_masses) { $hmdb_masses .= $mass.$sep ; } elsif ( $i == $nb_masses ) { $hmdb_masses .= $mass ; } else { last ; } $i ++ ; } } else { croak "No mass list found \n" ; } return($hmdb_masses, $nb_masses) ; } ## END of SUB =head2 METHOD get_matches_from_hmdb ## Description : permet de requeter sur hmdb avec une masse, un delta de masse sur la banque de metabolites hmdb ## Input : $mass, $delta, $mode ## Output : $results ## Usage : my ( $results ) = get_matches_from_hmdb( $mass, $delta, $mode ) ; =cut ## START of SUB sub get_matches_from_hmdb { ## Retrieve Values my $self = shift ; my ( $mass, $delta, $mode ) = @_ ; my @pages = () ; my $page = undef ; if ( (defined $mass) and (defined $delta) and (defined $mode) ) { my $url = 'http://www.hmdb.ca/spectra/spectra/ms/search?utf8=TRUE&query_masses='.$mass.'&tolerance='.$delta.'&mode='.$mode.'&commit=Search' ; # print $url."\n" ; my $oUrl = url($url); $page = get($oUrl); # print $page."\n" ; ## manage output if ( ( !defined $page ) or ( $page eq "" ) ) { die "Problem to connect to HMDB, page empty or undefined.\n" ; } else { @pages = split(/\n/, $page); } } return(\@pages) ; } ## END of SUB =head2 METHOD get_matches_from_hmdb_ua ## Description : permet de requeter via un user agent sur hmdb avec une masse, un delta de masse sur la banque de metabolites hmdb ## Input : $mass, $delta, $mode ## Output : $results ## Usage : my ( $results ) = get_matches_from_hmdb( $mass, $delta, $mode ) ; =cut ## START of SUB sub get_matches_from_hmdb_ua { ## Retrieve Values my $self = shift ; my ( $masses, $delta, $mode ) = @_ ; my @page = () ; my $ua = new LWP::UserAgent; $ua->agent("Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.131 Safari/537.36"); my $req = HTTP::Request->new( POST => 'http://specdb.wishartlab.com/ms/search.csv'); $req->content_type('application/x-www-form-urlencoded'); $req->content('utf8=TRUE&mode='.$mode.'&query_masses='.$masses.'&tolerance='.$delta.'&database=HMDB&commit=Download Results As CSV'); my $res = $ua->request($req); # print $res->as_string; @page = split ( /\n/, $res->decoded_content ) ; return (\@page) ; } ## END of SUB =head2 METHOD parse_hmdb_csv_results ## Description : parse the csv results and get data ## Input : $csv ## Output : $results ## Usage : my ( $results ) = parse_hmdb_csv_results( $csv ) ; =cut ## START of SUB sub parse_hmdb_csv_results { ## Retrieve Values my $self = shift ; my ( $csv, $masses ) = @_ ; my $test = 0 ; my ($query_mass,$compound_id,$formula,$compound_mass,$adduct,$adduct_type,$adduct_mass,$delta) = (0, undef, undef, undef, undef, undef, undef, undef) ; my %result_by_entry = () ; my %features = () ; foreach my $line (@{$csv}) { if ($line !~ /query_mass,compound_id,formula,compound_mass,adduct,adduct_type,adduct_mass,delta/) { my @entry = split(/,/, $line) ; if ( !exists $result_by_entry{$entry[0]} ) { $result_by_entry{$entry[0]} = [] ; } $features{ENTRY_ENTRY_ID} = $entry[1] ; $features{ENTRY_FORMULA} = $entry[2] ; $features{ENTRY_CPD_MZ} = $entry[3] ; $features{ENTRY_ADDUCT} = $entry[4] ; $features{ENTRY_ADDUCT_TYPE} = $entry[5] ; $features{ENTRY_ADDUCT_MZ} = $entry[6] ; $features{ENTRY_DELTA} = $entry[7] ; my %temp = %features ; push (@{$result_by_entry{$entry[0]} }, \%temp) ; } else { next ; } } ## end foreach ## manage per query_mzs (keep query masses order by array) my @results = () ; foreach (@{$masses}) { if ($result_by_entry{$_}) { push (@results, $result_by_entry{$_}) ; } else {push (@results, [] ) ;} ; } return(\@results) ; } ## END of SUB =head2 METHOD parse_hmdb_page_results ## Description : permet de parser le contenu des resultats hmdb ## Input : $page ## Output : $results ## Usage : my ( $results ) = parse_hmdb_page_result( $pages ) ; =cut ## START of SUB sub parse_hmdb_page_results { ## Retrieve Values my $self = shift ; my ( $page ) = @_ ; my @results = () ; my ($catch_table, $catch_name) = (0, 0) ; my ($name, $adduct, $adduct_mw, $cpd_mw, $delta) = (undef, undef, undef, undef, undef) ; if ( defined $page ) { my @page = @{$page} ; my $ID = undef ; my @result_by_mz = () ; my %result_by_entry = () ; foreach my $line (@page) { #Section de la page contenant les resultat if( $line =~/<table>/ ) { $catch_table = 1 ; } ## Si il existe un resultat : if($catch_table == 1) { #Id de la molecule, et creation du lien if( $line =~ /<a href=\"\/metabolites\/(\w+)\" (.*)>/ ) { $ID = $1 ; $catch_name = 0 ; next ; } #Nom de la molecule ONLY!! if ( $catch_name == 0 ) { if( $line =~ /<td>(.+)<\/td>/ ) { if ( !defined $name ) { $name = $1 ; $result_by_entry{'ENTRY_ENTRY_ID'} = $ID ; $result_by_entry{'ENTRY_NAME'} = $name ; next ; } if ( !defined $adduct ) { $adduct = $1 ; $result_by_entry{'ENTRY_ADDUCT'} = $adduct ; next ; } if ( !defined $adduct_mw ) { $adduct_mw = $1 ; $result_by_entry{'ENTRY_ADDUCT_MZ'} = $adduct_mw ; next ; } if ( !defined $cpd_mw ) { $cpd_mw = $1 ; $result_by_entry{'ENTRY_CPD_MZ'} = $cpd_mw ; next ; } if ( !defined $delta ) { $delta = $1 ; $result_by_entry{'ENTRY_DELTA'} = $delta ; $catch_name = 1 ; my %tmp = %result_by_entry ; push (@result_by_mz, \%tmp) ; %result_by_entry = () ; ( $name, $cpd_mw, $delta, $adduct, $adduct_mw ) = ( undef, undef, undef, undef, undef ) ; next ; } } } } #Fin de la section contenant les resultats if( $line =~ /<\/table>/ ) { $catch_table = 0 ; my @Tmp = @result_by_mz ; push(@results, \@Tmp) ; @result_by_mz = () ; } } } return(\@results) ; } ## END of SUB =head2 METHOD set_html_tbody_object ## Description : initializes and build the tbody object (perl array) need to html template ## Input : $nb_pages, $nb_items_per_page ## Output : $tbody_object ## Usage : my ( $tbody_object ) = set_html_tbody_object($nb_pages, $nb_items_per_page) ; =cut ## START of SUB sub set_html_tbody_object { my $self = shift ; my ( $nb_pages, $nb_items_per_page ) = @_ ; my ( @tbody_object ) = ( ) ; for ( my $i = 1 ; $i <= $nb_pages ; $i++ ) { my %pages = ( # tbody feature PAGE_NB => $i, MASSES => [], ## end MASSES ) ; ## end TBODY N push (@tbody_object, \%pages) ; } return(\@tbody_object) ; } ## END of SUB =head2 METHOD add_mz_to_tbody_object ## Description : initializes and build the mz object (perl array) need to html template ## Input : $tbody_object, $nb_items_per_page, $mz_list ## Output : $tbody_object ## Usage : my ( $tbody_object ) = add_mz_to_tbody_object( $tbody_object, $nb_items_per_page, $mz_list ) ; =cut ## START of SUB sub add_mz_to_tbody_object { my $self = shift ; my ( $tbody_object, $nb_items_per_page, $mz_list, $ids_list ) = @_ ; my ( $current_page, $mz_index ) = ( 0, 0 ) ; foreach my $page ( @{$tbody_object} ) { my @colors = ('white', 'green') ; my ( $current_index, , $icolor ) = ( 0, 0 ) ; for ( my $i = 1 ; $i <= $nb_items_per_page ; $i++ ) { # if ( $current_index > $nb_items_per_page ) { ## manage exact mz per html page $current_index = 0 ; last ; ## } else { $current_index++ ; if ( $icolor > 1 ) { $icolor = 0 ; } if ( exists $mz_list->[$mz_index] ) { my %mz = ( # mass feature MASSES_ID_QUERY => $ids_list->[$mz_index], MASSES_MZ_QUERY => $mz_list->[$mz_index], MZ_COLOR => $colors[$icolor], MASSES_NB => $mz_index+1, ENTRIES => [] , ) ; push ( @{ $tbody_object->[$current_page]{MASSES} }, \%mz ) ; # Html attr for mass $icolor++ ; } } $mz_index++ ; } ## foreach mz $current_page++ ; } return($tbody_object) ; } ## END of SUB =head2 METHOD add_entries_to_tbody_object ## Description : initializes and build the mz object (perl array) need to html template ## Input : $tbody_object, $nb_items_per_page, $mz_list, $entries ## Output : $tbody_object ## Usage : my ( $tbody_object ) = add_entries_to_tbody_object( $tbody_object, $nb_items_per_page, $mz_list, $entries ) ; =cut ## START of SUB sub add_entries_to_tbody_object { ## Retrieve Values my $self = shift ; my ( $tbody_object, $nb_items_per_page, $mz_list, $entries ) = @_ ; my $index_page = 0 ; my $index_mz_continous = 0 ; foreach my $page (@{$tbody_object}) { my $index_mz = 0 ; foreach my $mz (@{ $tbody_object->[$index_page]{MASSES} }) { my $index_entry = 0 ; my @anti_redondant = ('N/A') ; my $check_rebond = 0 ; foreach my $entry (@{ $entries->[$index_mz_continous] }) { ## dispo anti doublons des entries foreach my $rebond (@anti_redondant) { if ( $rebond eq $entries->[$index_mz_continous][$index_entry]{ENTRY_ENTRY_ID} ) { $check_rebond = 1 ; last ; } } if ( $check_rebond == 0 ) { push ( @anti_redondant, $entries->[$index_mz_continous][$index_entry]{ENTRY_ENTRY_ID} ) ; my %entry = ( ENTRY_COLOR => $tbody_object->[$index_page]{MASSES}[$index_mz]{MZ_COLOR}, ENTRY_ENTRY_ID => $entries->[$index_mz_continous][$index_entry]{ENTRY_ENTRY_ID}, ENTRY_ENTRY_ID2 => $entries->[$index_mz_continous][$index_entry]{ENTRY_ENTRY_ID}, ENTRY_FORMULA => $entries->[$index_mz_continous][$index_entry]{ENTRY_FORMULA}, ENTRY_CPD_MZ => $entries->[$index_mz_continous][$index_entry]{ENTRY_CPD_MZ}, ENTRY_ADDUCT => $entries->[$index_mz_continous][$index_entry]{ENTRY_ADDUCT}, ENTRY_ADDUCT_TYPE => $entries->[$index_mz_continous][$index_entry]{ENTRY_ADDUCT_TYPE}, ENTRY_ADDUCT_MZ => $entries->[$index_mz_continous][$index_entry]{ENTRY_ADDUCT_MZ}, ENTRY_DELTA => $entries->[$index_mz_continous][$index_entry]{ENTRY_DELTA}, ) ; push ( @{ $tbody_object->[$index_page]{MASSES}[$index_mz]{ENTRIES} }, \%entry) ; } $check_rebond = 0 ; ## reinit double control $index_entry++ ; } $index_mz ++ ; $index_mz_continous ++ ; } $index_page++ ; } return($tbody_object) ; } ## END of SUB =head2 METHOD write_html_skel ## Description : prepare and write the html output file ## Input : $html_file_name, $html_object, $html_template ## Output : $html_file_name ## Usage : my ( $html_file_name ) = write_html_skel( $html_file_name, $html_object ) ; =cut ## START of SUB sub write_html_skel { ## Retrieve Values my $self = shift ; my ( $html_file_name, $html_object, $pages , $search_condition, $html_template, $js_path, $css_path ) = @_ ; my $html_file = $$html_file_name ; if ( defined $html_file ) { open ( HTML, ">$html_file" ) or die "Can't create the output file $html_file " ; if (-e $html_template) { my $ohtml = HTML::Template->new(filename => $html_template); $ohtml->param( JS_GALAXY_PATH => $js_path, CSS_GALAXY_PATH => $css_path ) ; $ohtml->param( CONDITIONS => $search_condition ) ; $ohtml->param( PAGES_NB => $pages ) ; $ohtml->param( PAGES => $html_object ) ; print HTML $ohtml->output ; } else { croak "Can't fill any html output : No template available ($html_template)\n" ; } close (HTML) ; } else { croak "No output file name available to write HTML file\n" ; } return(\$html_file) ; } ## END of SUB =head2 METHOD set_lm_matrix_object ## Description : build the bih_row under its ref form ## Input : $header, $init_mzs, $col_mzdb, $results, $rts, $col_rtdb, $bank_head, $sep ## Output : $hmdb_matrix ## Usage : my ( $hmdb_matrix ) = set_lm_matrix_object( $header, $init_mzs, $col_mzdb, $results, $rts, $col_rtdb, $bank_head, $sep ) ; =cut ## START of SUB sub set_lm_matrix_object { ## Retrieve Values my $self = shift ; my ( $header, $init_mzs, $col_mzdb, $results, $rts, $col_rtdb, $bank_head, $sep ) = @_ ; my @bih_matrix = () ; if ( defined $header ) { my @headers = () ; push @headers, $header ; push @bih_matrix, \@headers ; } my $index_mz = 0 ; $col_mzdb -= 1; $col_rtdb -= 1; ## conversion in array number foreach my $mz ( @{$init_mzs} ) { my $index_entries = 0 ; my @clusters = () ; my $cluster_col = undef ; foreach my $entry (@{ $results->[$index_mz] }) { my $format_float = Math::BigFloat->new($entry->[$col_mzdb]); ## requires "use Math::BigFloat;" my $delta_mz = abs( $format_float-$mz ); ## management problem on small float # manage final pipe if ($index_entries == 0){ $cluster_col .= $delta_mz.$sep.$entry->[$col_mzdb].$sep ; } ## Managing multiple results transition pipes "|" else{ $cluster_col .= '|'.$delta_mz.$sep.$entry->[$col_mzdb].$sep ; } if ( ( defined $rts ) and ( $rts ne "" ) ) { my $rt = $rts->[$index_mz] ; my $format_float_rt = Math::BigFloat->new($entry->[$col_rtdb]); ## requires "use Math::BigFloat;" my $delta_rt = abs( $format_float_rt-$rt ); ## management problem on small float $cluster_col .= $delta_rt.$sep.$entry->[$col_rtdb].$sep; } for (my $i=0; $i<=$#$entry; $i++){ if($i == $#$entry){ $cluster_col .= $entry->[$i]; } ## Managing multiple results transition "#" else { $cluster_col .= $entry->[$i].$sep; } } $index_entries++ ; } if ( !defined $cluster_col ) { $cluster_col = 'No_result_found_in_bank_inhouse' ; } ## $cluster_col like METLIN data display model but the "::" have been modified (#) for ease of Excel reading ## entry1=VAR1::VAR2::VAR3::VAR4|entry2=VAR1::VAR2::VAR3::VAR4|... push (@clusters, $cluster_col) ; push (@bih_matrix, \@clusters) ; $index_mz++ ; } return(\@bih_matrix) ; } ## END of SUB =head2 METHOD add_lm_matrix_to_input_matrix ## Description : build a full matrix (input + lm column) ## Input : $input_matrix_object, $lm_matrix_object, $nb_header ## Output : $output_matrix_object ## Usage : my ( $output_matrix_object ) = add_lm_matrix_to_input_matrix( $input_matrix_object, $lm_matrix_object, $nb_header ) ; =cut ## START of SUB sub add_lm_matrix_to_input_matrix { ## Retrieve Values my $self = shift ; my ( $input_matrix_object, $lm_matrix_object, $nb_header ) = @_ ; my @output_matrix_object = () ; my $index_row = 0 ; my $line = 0 ; foreach my $row ( @{$input_matrix_object} ) { my @init_row = @{$row} ; $line++; if ( ( defined $nb_header ) and ( $line <= $nb_header) ) { push (@output_matrix_object, \@init_row) ; next ; } if ( $lm_matrix_object->[$index_row] ) { my $dim = scalar(@{$lm_matrix_object->[$index_row]}) ; if ($dim > 1) { warn "the add method can't manage more than one column\n" ;} if (defined $lm_matrix_object->[$index_row][$dim-1]) { my $lm_col = $lm_matrix_object->[$index_row][$dim-1] ; push (@init_row, $lm_col) ; } $index_row++ ; } push (@output_matrix_object, \@init_row) ; } return(\@output_matrix_object) ; } ## END of SUB =head2 METHOD write_csv_skel ## Description : prepare and write csv output file ## Input : $csv_file, $rows ## Output : $csv_file ## Usage : my ( $csv_file ) = write_csv_skel( $csv_file, $rows ) ; =cut ## START of SUB sub write_csv_skel { ## Retrieve Values my $self = shift ; my ( $csv_file, $rows ) = @_ ; my $ocsv = lib::csv::new() ; my $csv = $ocsv->get_csv_object("\t") ; $ocsv->write_csv_from_arrays($csv, $$csv_file, $rows) ; return($csv_file) ; } ## END of SUB =head2 METHOD write_full_excel_like ## Description : allows to print a tsv file ## Input : $input_matrix_object, $sep, $masses, $mz_delta_type, $mz_delta, $col_mzdb, $rts, $rt_delta, $col_rtdb, $results, $file, $nb_header, $bank_head, $bank_name ## Output : N/A ## Usage : write_full_excel_like( $input_matrix_object, $sep, $masses, $mz_delta_type, $mz_delta, $col_mzdb, $rts, $rt_delta, $col_rtdb, $results, $file, $nb_header, $bank_head, $bank_name ) ; =cut ## START of SUB sub write_full_excel_like { ## Retrieve Values my $self = shift ; my ( $input_matrix_object, $sep, $masses, $mz_delta_type, $mz_delta, $col_mzdb, $rts, $rt_delta, $col_rtdb, $results, $file, $nb_header, $bank_head, $bank_name ) = @_ ; open(CSV, '>:utf8', $file) or die "Cant' create the file $file\n" ; my $line = 0 ; my $index_mz = 0 ; $col_mzdb -= 1; $col_rtdb -= 1; ## conversion in array number foreach my $row ( @{$input_matrix_object} ) { my $join_row = join($sep, @$row); if ( defined $nb_header ){ $line++; if ( $line < $nb_header ) { print CSV $join_row."\n"; next ; } elsif ( $line == $nb_header ){ my $head = join("_".$bank_name.$sep, @$bank_head); print CSV $join_row.$sep.$head."_".$bank_name."\n"; next ; } } my $mass = $masses->[$index_mz] ; my $results4mass = $results->[$index_mz]; if ( ref($results4mass) eq 'ARRAY' and defined $results4mass and $results4mass ne [] and $#$results4mass>=0) { ## an requested id has a result in the list of array $results. foreach my $entry (@{$results->[$index_mz]}) { print CSV $join_row."\t"; my $format_float = Math::BigFloat->new($entry->[$col_mzdb]); ## requires "use Math::BigFloat;" my $delta_mass = abs( $format_float-$mass ); ## management problem on small float print CSV $delta_mass."\t".$entry->[$col_mzdb]; if ( ( defined $rts ) and ( $rts ne "" ) ) { my $rt = $rts->[$index_mz] ; my $format_float_rt = Math::BigFloat->new($entry->[$col_rtdb]); ## requires "use Math::BigFloat;" my $delta_rt = abs( $format_float_rt-$rt ); ## management problem on small float print CSV "\t".$rt."\t".$delta_rt."\t".$entry->[$col_rtdb]; } for (my $i=0; $i<=$#$entry; $i++){ print CSV "\t".$entry->[$i]; } print CSV "\n"; } } else { print CSV $join_row."\tno results found"; for (my $i=0; $i<$#$bank_head; $i++){ print CSV "\t"; } print CSV "\n"; } $index_mz++ ; } close(CSV) ; return() ; } ## END of SUB =head2 METHOD write_excel_like_mass ## Description : allows to print a tsv file if retention time is required ## Input : $masses, $ids, $mz_delta_type, $mz_delta, $col_mzdb, $rts, $rt_delta, $col_rtdb, $results, $file, $bank_head ## Output : N/A ## Usage : write_excel_like_mass( $masses, $ids, $mz_delta_type, $mz_delta, $col_mzdb, $rts, $rt_delta, $col_rtdb, $results, $file, $bank_head ) ; =cut ## START of SUB sub write_excel_like_mass { ## Retrieve Values my $self = shift ; my ( $masses, $mz_delta_type, $mz_delta, $col_mzdb, $rts, $rt_delta, $col_rtdb, $results, $file, $out_head ) = @_ ; open(CSV, '>:utf8', $file) or die "Cant' create the file $file\n" ; my $index_mz = 0 ; my @bank_head = @$out_head; $col_mzdb -= 1; ## conversion in array number if ( ( defined $rts ) and ( $rts ne "" ) ) { splice (@bank_head, 2, 0, "RT_Submit"); $col_rtdb -= 1; ## conversion in array number } my $head = join("\t", @bank_head); print CSV "MASS_Submit\t".$head."\n" ; foreach my $mass (@{$masses}) { my $results4mass = $results->[$index_mz]; if ( ref($results4mass) eq 'ARRAY' and defined $results4mass and $results4mass ne [] and $#$results4mass>=0) { ## an requested id has a result in the list of array $results. foreach my $entry (@{$results->[$index_mz]}) { print CSV $mass."\t"; my $format_float = Math::BigFloat->new($entry->[$col_mzdb]); ## requires "use Math::BigFloat;" my $delta_mass = abs( $format_float-$mass ); ## management problem on small float print CSV $delta_mass."\t".$entry->[$col_mzdb]; if ( ( defined $rts ) and ( $rts ne "" ) ) { my $rt = $rts->[$index_mz] ; my $format_float_rt = Math::BigFloat->new($entry->[$col_rtdb]); ## requires "use Math::BigFloat;" my $delta_rt = abs( $format_float_rt-$rt ); ## management problem on small float print CSV "\t".$rt."\t".$delta_rt."\t".$entry->[$col_rtdb]; } for (my $i=0; $i<=$#$entry; $i++){ print CSV "\t".$entry->[$i]; } print CSV "\n"; } } else { print CSV $mass."\tno results found"; for (my $i=0; $i<$#bank_head; $i++){ print CSV "\t"; } print CSV "\n"; } $index_mz++ ; } close(CSV) ; return() ; } ## END of SUB =head2 METHOD db_pforest_get_clean_range ## Description : get a clean range of mass from PeakForest and REST ## Input : $ws_host, $query, $max ## Output : $json ## Usage : $json = db_pforest_get_clean_range( $ws_host, $query, $max ) ; =cut ## START of SUB #sub db_pforest_get_clean_range { # my $self = shift; # my ( $ws_host, $query, $min, $max, $mode) = @_; # my $json = undef ; # # init ## my $ws_url = "https://rest.peakforest.org/search/compounds/monoisotopicmass/59.048/0.02"; # # $ENV{HTTPS_VERSION} = 3; # #$ENV{PERL_LWP_SSL_VERIFY_HOSTNAME} = 0 ; # # my $headers = {Accept => 'application/json', Authorization => 'Basic '}; # my $client = REST::Client->new({ # host => $ws_host, ## cert => '/path/to/ssl.crt', ## key => '/path/to/ssl.key', ## ca => '/path/to/ca.file', # timeout => 100, # }); # my $complete_query = $query.'/'.$min.'/'.$max ; # # if (defined $mode) { # $complete_query = $complete_query.'?mode='.$mode ; # } # # # print $complete_query."\n" ; # $client->GET($complete_query , $headers); # $json = from_json ($client->responseContent()) ; # # return ($json) ; #} =head2 METHOD map_pfjson_bankobject ## Description : map PForest json with the original BiH Bank object ## Input : $json ## Output : $complete_bank, $bank_head ## Usage : ($complete_bank, $bank_heads) = map_pfjson_bankobject( json ) ; =cut ## START of SUB sub map_pfjson_bankobject { my $self = shift; my ( $json ) = @_; my ( %complete_bank ) = () ; my ( @bank_head ) = ('id', 'mz') ; foreach my $cpd (@$json) { $complete_bank{$cpd->{'mz'}} = [] ; my @tmp = @{$cpd->{'cpds'}} ; push ( @tmp, $cpd->{'mz'} ) ; push ( @{ $complete_bank{$cpd->{'mz'} } }, \@tmp ) ; } return (\%complete_bank, \@bank_head) ; } ## END of SUB 1 ; __END__ =head1 SUPPORT You can find documentation for this module with the perldoc command. perldoc bih.pm =head1 Exports =over 4 =item :ALL is prepare_multi_masses_query =back =head1 AUTHOR Marion Landi E<lt>marion.landi@clermont.inra.frE<gt> =head1 LICENSE This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =head1 VERSION version 1 : 19 / 11 / 2014 =cut