comparison hmdb/lib/hmdb.pm @ 0:9583f9772198 draft

Init and uploaded
author fgiacomoni
date Thu, 28 Jan 2016 10:52:26 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:9583f9772198
1 package lib::hmdb ;
2
3 use strict;
4 use warnings ;
5 use Exporter ;
6 use Carp ;
7
8 use LWP::Simple;
9 use LWP::UserAgent;
10 use URI::URL;
11 use SOAP::Lite;
12 use Encode;
13 use HTML::Template ;
14
15 use Data::Dumper ;
16
17 use vars qw($VERSION @ISA @EXPORT %EXPORT_TAGS);
18
19 our $VERSION = "1.0";
20 our @ISA = qw(Exporter);
21 our @EXPORT = qw( extract_sub_mz_lists test_matches_from_hmdb_ua prepare_multi_masses_query get_matches_from_hmdb_ua parse_hmdb_csv_results set_html_tbody_object add_mz_to_tbody_object add_entries_to_tbody_object write_html_skel set_lm_matrix_object set_hmdb_matrix_object_with_ids add_lm_matrix_to_input_matrix write_csv_skel write_csv_one_mass );
22 our %EXPORT_TAGS = ( ALL => [qw( extract_sub_mz_lists test_matches_from_hmdb_ua prepare_multi_masses_query get_matches_from_hmdb_ua parse_hmdb_csv_results set_html_tbody_object add_mz_to_tbody_object add_entries_to_tbody_object write_html_skel set_lm_matrix_object set_hmdb_matrix_object_with_ids add_lm_matrix_to_input_matrix write_csv_skel write_csv_one_mass )] );
23
24 =head1 NAME
25
26 My::Module - An example module
27
28 =head1 SYNOPSIS
29
30 use My::Module;
31 my $object = My::Module->new();
32 print $object->as_string;
33
34 =head1 DESCRIPTION
35
36 This module does not really exist, it
37 was made for the sole purpose of
38 demonstrating how POD works.
39
40 =head1 METHODS
41
42 Methods are :
43
44 =head2 METHOD new
45
46 ## Description : new
47 ## Input : $self
48 ## Ouput : bless $self ;
49 ## Usage : new() ;
50
51 =cut
52
53 sub new {
54 ## Variables
55 my $self={};
56 bless($self) ;
57 return $self ;
58 }
59 ### END of SUB
60
61
62 =head2 METHOD extract_sub_mz_lists
63
64 ## Description : extract a couples of sublist from a long mz list (more than $HMDB_LIMITS)
65 ## Input : $HMDB_LIMITS, $masses
66 ## Output : $sublists
67 ## Usage : my ( $sublists ) = extract_sub_mz_lists( $HMDB_LIMITS, $masses ) ;
68
69 =cut
70 ## START of SUB
71 sub extract_sub_mz_lists {
72 ## Retrieve Values
73 my $self = shift ;
74 my ( $masses, $HMDB_LIMITS ) = @_ ;
75
76 my ( @sublists, @sublist ) = ( (), () ) ;
77 my $nb_mz = 0 ;
78 my $nb_total_mzs = scalar(@{$masses}) ;
79
80 if ($nb_total_mzs == 0) {
81 die "The provided mzs list is empty" ;
82 }
83
84 for ( my $current_pos = 0 ; $current_pos < $nb_total_mzs ; $current_pos++ ) {
85
86 if ( $nb_mz < $HMDB_LIMITS ) {
87 if ( $masses->[$current_pos] ) { push (@sublist, $masses->[$current_pos]) ; $nb_mz++ ; } # build sub list
88 }
89 elsif ( $nb_mz == $HMDB_LIMITS ) {
90 my @tmp = @sublist ; push (@sublists, \@tmp) ; @sublist = () ; $nb_mz = 0 ;
91 $current_pos-- ;
92 }
93 if ($current_pos == $nb_total_mzs-1) { my @tmp = @sublist ; push (@sublists, \@tmp) ; }
94 }
95 return(\@sublists) ;
96 }
97 ## END of SUB
98
99 =head2 METHOD prepare_multi_masses_query
100
101 ## Description : Generate the adapted format of the mz list for HMDB
102 ## Input : $masses
103 ## Output : $hmdb_masses
104 ## Usage : my ( $hmdb_masses ) = prepare_multi_masses_query( $masses ) ;
105
106 =cut
107 ## START of SUB
108 sub prepare_multi_masses_query {
109 ## Retrieve Values
110 my $self = shift ;
111 my ( $masses ) = @_ ;
112
113 my $hmdb_masses = undef ;
114 my $sep = '%0D%0A' ; ## retour chariot encode
115 my ($nb_masses, $i) = (0, 0) ;
116
117 if ( defined $masses ) {
118 my @masses = @{$masses} ;
119 my $nb_masses = scalar ( @masses ) ;
120 if ( $nb_masses == 0 ) { croak "The input method parameter mass list is empty" ; }
121 elsif ( $nb_masses >= 150 ) { croak "Your mass list is too long : HMDB allows maximum 150 query masses per request \n" ; } ## Del it --- temporary patch
122
123 foreach my $mass (@masses) {
124
125 if ($i < $nb_masses) {
126 $hmdb_masses .= $mass.$sep ;
127 }
128 elsif ( $i == $nb_masses ) {
129 $hmdb_masses .= $mass ;
130 }
131 else {
132 last ;
133 }
134 $i ++ ;
135 }
136 }
137 else {
138 croak "No mass list found \n" ;
139 }
140 return($hmdb_masses, $nb_masses) ;
141 }
142 ## END of SUB
143
144 =head2 METHOD test_matches_from_hmdb_ua
145
146 ## Description : test a single query with tests parameters on hmdb - get the status of the complete server infra.
147 ## Input : none
148 ## Output : $status_line
149 ## Usage : my ( $status_line ) = test_matches_from_hmdb_ua( ) ;
150
151 =cut
152 ## START of SUB
153 sub test_matches_from_hmdb_ua {
154 ## Retrieve Values
155 my $self = shift ;
156
157 my @page = () ;
158
159 my $ua = new LWP::UserAgent;
160 $ua->agent("Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.131 Safari/537.36");
161
162 my $req = HTTP::Request->new(
163 POST => 'http://specdb.wishartlab.com/ms/search.csv');
164
165 $req->content_type('application/x-www-form-urlencoded');
166 $req->content('utf8=TRUE&mode=positive&query_masses=420.159317&tolerance=0.000001&database=HMDB&commit=Download Results As CSV');
167
168 my $res = $ua->request($req);
169 # print $res->as_string;
170 my $status_line = $res->status_line ;
171 ($status_line) = ($status_line =~ /(\d+)/);
172
173
174 return (\$status_line) ;
175 }
176 ## END of SUB
177
178 =head2 METHOD check_state_from_hmdb_ua
179
180 ## Description : check the thhp status of hmdb and kill correctly the script if necessary.
181 ## Input : $status
182 ## Output : none
183 ## Usage : check_state_from_hmdb_ua($status) ;
184
185 =cut
186 ## START of SUB
187 sub check_state_from_hmdb_ua {
188 ## Retrieve Values
189 my $self = shift ;
190 my ($status) = @_ ;
191
192 if (!defined $$status) {
193 croak "No http status is defined for the distant server" ;
194 }
195 else {
196 unless ( $$status == 200 ) {
197 if ( $$status == 504 ) { croak "Gateway Timeout: The HMDB server was acting as a gateway or proxy and did not receive a timely response from the upstream server" ; }
198 else {
199 ## None supported http code error ##
200 }
201 }
202 }
203
204 return (1) ;
205 }
206 ## END of SUB
207
208 =head2 METHOD get_matches_from_hmdb_ua
209
210 ## Description : HMDB querying via an user agent with parameters : mz, delta and molecular species (neutral, pos, neg)
211 ## Input : $mass, $delta, $mode
212 ## Output : $results
213 ## Usage : my ( $results ) = get_matches_from_hmdb( $mass, $delta, $mode ) ;
214
215 =cut
216 ## START of SUB
217 sub get_matches_from_hmdb_ua {
218 ## Retrieve Values
219 my $self = shift ;
220 my ( $masses, $delta, $mode ) = @_ ;
221
222 my @page = () ;
223
224 my $ua = new LWP::UserAgent;
225 $ua->agent("Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.131 Safari/537.36");
226
227 my $req = HTTP::Request->new(
228 POST => 'http://specdb.wishartlab.com/ms/search.csv');
229
230 $req->content_type('application/x-www-form-urlencoded');
231 $req->content('utf8=TRUE&mode='.$mode.'&query_masses='.$masses.'&tolerance='.$delta.'&database=HMDB&commit=Download Results As CSV');
232
233 my $res = $ua->request($req);
234 # print $res->as_string;
235 if ($res->is_success) {
236 @page = split ( /\n/, $res->decoded_content ) ;
237 } else {
238 my $status_line = $res->status_line ;
239 ($status_line) = ($status_line =~ /(\d+)/);
240 croak "HMDB service none available !! Status of the HMDB server is : $status_line\n" ;
241 }
242
243
244 return (\@page) ;
245 }
246 ## END of SUB
247
248 =head2 METHOD parse_hmdb_csv_results
249
250 ## Description : parse the csv results and get data
251 ## Input : $csv
252 ## Output : $results
253 ## Usage : my ( $results ) = parse_hmdb_csv_results( $csv ) ;
254
255 =cut
256 ## START of SUB
257 sub parse_hmdb_csv_results {
258 ## Retrieve Values
259 my $self = shift ;
260 my ( $csv, $masses ) = @_ ;
261
262 my $test = 0 ;
263 my ($query_mass,$compound_id,$formula,$compound_mass,$adduct,$adduct_type,$adduct_mass,$delta) = (0, undef, undef, undef, undef, undef, undef, undef) ;
264
265 my %result_by_entry = () ;
266 my %features = () ;
267
268 # print Dumper $csv ;
269
270 foreach my $line (@{$csv}) {
271
272 if ($line !~ /query_mass,compound_id,formula,compound_mass,adduct,adduct_type,adduct_mass,delta/) {
273 my @entry = split(/,/, $line) ;
274
275 if ( !exists $result_by_entry{$entry[0]} ) { $result_by_entry{$entry[0]} = [] ; }
276
277 $features{ENTRY_ENTRY_ID} = $entry[1] ;
278 $features{ENTRY_FORMULA} = $entry[2] ;
279 $features{ENTRY_CPD_MZ} = $entry[3] ;
280 $features{ENTRY_ADDUCT} = $entry[4] ;
281 $features{ENTRY_ADDUCT_TYPE} = $entry[5] ;
282 $features{ENTRY_ADDUCT_MZ} = $entry[6] ;
283 $features{ENTRY_DELTA} = $entry[7] ;
284
285 my %temp = %features ;
286
287 push (@{$result_by_entry{$entry[0]} }, \%temp) ;
288 }
289 else {
290 next ;
291 }
292 } ## end foreach
293
294 ## manage per query_mzs (keep query masses order by array)
295 my @results = () ;
296 foreach (@{$masses}) {
297 if ($result_by_entry{$_}) { push (@results, $result_by_entry{$_}) ; }
298 else {push (@results, [] ) ;} ;
299 }
300 return(\@results) ;
301 }
302 ## END of SUB
303
304 =head2 METHOD parse_hmdb_page_results
305
306 ## Description : [DEPRECATED] old HMDB html page parser
307 ## Input : $page
308 ## Output : $results
309 ## Usage : my ( $results ) = parse_hmdb_page_result( $pages ) ;
310
311 =cut
312 ## START of SUB
313 sub parse_hmdb_page_results {
314 ## Retrieve Values
315 my $self = shift ;
316 my ( $page ) = @_ ;
317
318 my @results = () ;
319 my ($catch_table, $catch_name) = (0, 0) ;
320 my ($name, $adduct, $adduct_mw, $cpd_mw, $delta) = (undef, undef, undef, undef, undef) ;
321
322 if ( defined $page ) {
323
324 my @page = @{$page} ;
325 my $ID = undef ;
326 my @result_by_mz = () ;
327 my %result_by_entry = () ;
328
329 foreach my $line (@page) {
330
331 #Section de la page contenant les resultat
332 if( $line =~/<table>/ ) { $catch_table = 1 ; }
333
334 ## Si il existe un resultat :
335 if($catch_table == 1) {
336
337 #Id de la molecule, et creation du lien
338 if( $line =~ /<a href=\"\/metabolites\/(\w+)\" (.*)>/ ) {
339 $ID = $1 ;
340 $catch_name = 0 ;
341 next ;
342 }
343 #Nom de la molecule ONLY!!
344 if ( $catch_name == 0 ) {
345
346 if( $line =~ /<td>(.+)<\/td>/ ) {
347
348 if ( !defined $name ) {
349 $name = $1 ;
350 $result_by_entry{'ENTRY_ENTRY_ID'} = $ID ;
351 $result_by_entry{'ENTRY_NAME'} = $name ;
352 next ;
353 }
354 if ( !defined $adduct ) { $adduct = $1 ; $result_by_entry{'ENTRY_ADDUCT'} = $adduct ; next ; }
355 if ( !defined $adduct_mw ) { $adduct_mw = $1 ; $result_by_entry{'ENTRY_ADDUCT_MZ'} = $adduct_mw ; next ; }
356 if ( !defined $cpd_mw ) { $cpd_mw = $1 ; $result_by_entry{'ENTRY_CPD_MZ'} = $cpd_mw ; next ; }
357 if ( !defined $delta ) {
358 $delta = $1 ;
359 $result_by_entry{'ENTRY_DELTA'} = $delta ;
360 $catch_name = 1 ;
361 my %tmp = %result_by_entry ;
362 push (@result_by_mz, \%tmp) ;
363 %result_by_entry = () ;
364 ( $name, $cpd_mw, $delta, $adduct, $adduct_mw ) = ( undef, undef, undef, undef, undef ) ;
365 next ;
366 }
367 }
368 }
369 }
370 #Fin de la section contenant les resultats
371 if( $line =~ /<\/table>/ ) {
372 $catch_table = 0 ;
373 my @Tmp = @result_by_mz ;
374 push(@results, \@Tmp) ;
375 @result_by_mz = () ;
376 }
377 }
378 }
379 return(\@results) ;
380 }
381 ## END of SUB
382
383 =head2 METHOD set_html_tbody_object
384
385 ## Description : initializes and build the tbody object (perl array) needed to html template
386 ## Input : $nb_pages, $nb_items_per_page
387 ## Output : $tbody_object
388 ## Usage : my ( $tbody_object ) = set_html_tbody_object($nb_pages, $nb_items_per_page) ;
389
390 =cut
391 ## START of SUB
392 sub set_html_tbody_object {
393 my $self = shift ;
394 my ( $nb_pages, $nb_items_per_page ) = @_ ;
395
396 my ( @tbody_object ) = ( ) ;
397
398 for ( my $i = 1 ; $i <= $nb_pages ; $i++ ) {
399
400 my %pages = (
401 # tbody feature
402 PAGE_NB => $i,
403 MASSES => [], ## end MASSES
404 ) ; ## end TBODY N
405 push (@tbody_object, \%pages) ;
406 }
407 return(\@tbody_object) ;
408 }
409 ## END of SUB
410
411 =head2 METHOD add_mz_to_tbody_object
412
413 ## Description : initializes and build the mz object (perl array) needed to html template
414 ## Input : $tbody_object, $nb_items_per_page, $mz_list
415 ## Output : $tbody_object
416 ## Usage : my ( $tbody_object ) = add_mz_to_tbody_object( $tbody_object, $nb_items_per_page, $mz_list ) ;
417
418 =cut
419 ## START of SUB
420 sub add_mz_to_tbody_object {
421 my $self = shift ;
422 my ( $tbody_object, $nb_items_per_page, $mz_list, $ids_list ) = @_ ;
423
424 my ( $current_page, $mz_index ) = ( 0, 0 ) ;
425
426 foreach my $page ( @{$tbody_object} ) {
427
428 my @colors = ('white', 'green') ;
429 my ( $current_index, , $icolor ) = ( 0, 0 ) ;
430
431 for ( my $i = 1 ; $i <= $nb_items_per_page ; $i++ ) {
432 #
433 if ( $current_index > $nb_items_per_page ) { ## manage exact mz per html page
434 $current_index = 0 ;
435 last ; ##
436 }
437 else {
438 $current_index++ ;
439 if ( $icolor > 1 ) { $icolor = 0 ; }
440
441 if ( exists $mz_list->[$mz_index] ) {
442
443 my %mz = (
444 # mass feature
445 MASSES_ID_QUERY => $ids_list->[$mz_index],
446 MASSES_MZ_QUERY => $mz_list->[$mz_index],
447 MZ_COLOR => $colors[$icolor],
448 MASSES_NB => $mz_index+1,
449 ENTRIES => [] ,
450 ) ;
451 push ( @{ $tbody_object->[$current_page]{MASSES} }, \%mz ) ;
452 # Html attr for mass
453 $icolor++ ;
454 }
455 }
456 $mz_index++ ;
457 } ## foreach mz
458
459 $current_page++ ;
460 }
461 return($tbody_object) ;
462 }
463 ## END of SUB
464
465 =head2 METHOD add_entries_to_tbody_object
466
467 ## Description : initializes and build the entries object (perl array) needed to html template
468 ## Input : $tbody_object, $nb_items_per_page, $mz_list, $entries
469 ## Output : $tbody_object
470 ## Usage : my ( $tbody_object ) = add_entries_to_tbody_object( $tbody_object, $nb_items_per_page, $mz_list, $entries ) ;
471
472 =cut
473 ## START of SUB
474 sub add_entries_to_tbody_object {
475 ## Retrieve Values
476 my $self = shift ;
477 my ( $tbody_object, $nb_items_per_page, $mz_list, $entries ) = @_ ;
478
479 my $index_page = 0 ;
480 my $index_mz_continous = 0 ;
481
482 foreach my $page (@{$tbody_object}) {
483
484 my $index_mz = 0 ;
485
486 foreach my $mz (@{ $tbody_object->[$index_page]{MASSES} }) {
487
488 my $index_entry = 0 ;
489
490 my @anti_redondant = ('N/A') ;
491 my $check_rebond = 0 ;
492 my $check_noentry = 0 ;
493
494 foreach my $entry (@{ $entries->[$index_mz_continous] }) {
495 $check_noentry ++ ;
496 ## dispo anti doublons des entries
497 foreach my $rebond (@anti_redondant) {
498 if ( $rebond eq $entries->[$index_mz_continous][$index_entry]{ENTRY_ENTRY_ID} ) { $check_rebond = 1 ; last ; }
499 }
500
501 if ( $check_rebond == 0 ) {
502
503 push ( @anti_redondant, $entries->[$index_mz_continous][$index_entry]{ENTRY_ENTRY_ID} ) ;
504
505 my %entry = (
506 ENTRY_COLOR => $tbody_object->[$index_page]{MASSES}[$index_mz]{MZ_COLOR},
507 ENTRY_ENTRY_ID => $entries->[$index_mz_continous][$index_entry]{ENTRY_ENTRY_ID},
508 ENTRY_ENTRY_ID2 => $entries->[$index_mz_continous][$index_entry]{ENTRY_ENTRY_ID},
509 ENTRY_FORMULA => $entries->[$index_mz_continous][$index_entry]{ENTRY_FORMULA},
510 ENTRY_CPD_MZ => $entries->[$index_mz_continous][$index_entry]{ENTRY_CPD_MZ},
511 ENTRY_ADDUCT => $entries->[$index_mz_continous][$index_entry]{ENTRY_ADDUCT},
512 ENTRY_ADDUCT_TYPE => $entries->[$index_mz_continous][$index_entry]{ENTRY_ADDUCT_TYPE},
513 ENTRY_ADDUCT_MZ => $entries->[$index_mz_continous][$index_entry]{ENTRY_ADDUCT_MZ},
514 ENTRY_DELTA => $entries->[$index_mz_continous][$index_entry]{ENTRY_DELTA},
515 ) ;
516
517 push ( @{ $tbody_object->[$index_page]{MASSES}[$index_mz]{ENTRIES} }, \%entry) ;
518 }
519 $check_rebond = 0 ; ## reinit double control
520 $index_entry++ ;
521 } ## end foreach
522 if ($check_noentry == 0 ) {
523 my %entry = (
524 ENTRY_COLOR => $tbody_object->[$index_page]{MASSES}[$index_mz]{MZ_COLOR},
525 ENTRY_ENTRY_ID => 'No_result_found_on_HMDB',
526 ENTRY_ENTRY_ID2 => '',
527 ENTRY_FORMULA => 'n/a',
528 ENTRY_CPD_MZ => 'n/a',
529 ENTRY_ADDUCT => 'n/a',
530 ENTRY_ADDUCT_TYPE => 'n/a',
531 ENTRY_ADDUCT_MZ => 'n/a',
532 ENTRY_DELTA => 0,
533 ) ;
534 push ( @{ $tbody_object->[$index_page]{MASSES}[$index_mz]{ENTRIES} }, \%entry) ;
535 }
536 $index_mz ++ ;
537 $index_mz_continous ++ ;
538 }
539 $index_page++ ;
540 }
541 return($tbody_object) ;
542 }
543 ## END of SUB
544
545 =head2 METHOD write_html_skel
546
547 ## Description : prepare and write the html output file
548 ## Input : $html_file_name, $html_object, $html_template
549 ## Output : $html_file_name
550 ## Usage : my ( $html_file_name ) = write_html_skel( $html_file_name, $html_object ) ;
551
552 =cut
553 ## START of SUB
554 sub write_html_skel {
555 ## Retrieve Values
556 my $self = shift ;
557 my ( $html_file_name, $html_object, $pages , $search_condition, $html_template, $js_path, $css_path ) = @_ ;
558
559 my $html_file = $$html_file_name ;
560
561 if ( defined $html_file ) {
562 open ( HTML, ">$html_file" ) or die "Can't create the output file $html_file " ;
563
564 if (-e $html_template) {
565 my $ohtml = HTML::Template->new(filename => $html_template);
566 $ohtml->param( JS_GALAXY_PATH => $js_path, CSS_GALAXY_PATH => $css_path ) ;
567 $ohtml->param( CONDITIONS => $search_condition ) ;
568 $ohtml->param( PAGES_NB => $pages ) ;
569 $ohtml->param( PAGES => $html_object ) ;
570 print HTML $ohtml->output ;
571 }
572 else {
573 croak "Can't fill any html output : No template available ($html_template)\n" ;
574 }
575
576 close (HTML) ;
577 }
578 else {
579 croak "No output file name available to write HTML file\n" ;
580 }
581 return(\$html_file) ;
582 }
583 ## END of SUB
584
585 =head2 METHOD set_lm_matrix_object
586
587 ## Description : build the hmdb_row under its ref form
588 ## Input : $header, $init_mzs, $entries
589 ## Output : $hmdb_matrix
590 ## Usage : my ( $hmdb_matrix ) = set_lm_matrix_object( $header, $init_mzs, $entries ) ;
591
592 =cut
593 ## START of SUB
594 sub set_lm_matrix_object {
595 ## Retrieve Values
596 my $self = shift ;
597 my ( $header, $init_mzs, $entries ) = @_ ;
598
599 my @hmdb_matrix = () ;
600
601 if ( defined $header ) {
602 my @headers = () ;
603 push @headers, $header ;
604 push @hmdb_matrix, \@headers ;
605 }
606
607 my $index_mz = 0 ;
608
609 foreach my $mz ( @{$init_mzs} ) {
610
611 my $index_entries = 0 ;
612 my @clusters = () ;
613 my $cluster_col = undef ;
614
615 my @anti_redondant = ('N/A') ;
616 my $check_rebond = 0 ;
617
618 my $nb_entries = scalar (@{ $entries->[$index_mz] }) ;
619
620 foreach my $entry (@{ $entries->[$index_mz] }) {
621
622 ## dispo anti doublons des entries
623 foreach my $rebond (@anti_redondant) {
624 if ( $rebond eq $entries->[$index_mz][$index_entries]{ENTRY_ENTRY_ID} ) { $check_rebond = 1 ; last ; }
625 }
626
627 if ( $check_rebond == 0 ) {
628
629 push ( @anti_redondant, $entries->[$index_mz][$index_entries]{ENTRY_ENTRY_ID} ) ;
630
631 my $delta = $entries->[$index_mz][$index_entries]{ENTRY_DELTA} ;
632 my $formula = $entries->[$index_mz][$index_entries]{ENTRY_FORMULA} ;
633 my $hmdb_id = $entries->[$index_mz][$index_entries]{ENTRY_ENTRY_ID} ;
634
635 ## METLIN data display model
636 ## entry1=VAR1::VAR2::VAR3::VAR4|entry2=VAR1::VAR2::VAR3::VAR4|...
637 # manage final pipe
638 if ($index_entries < $nb_entries-1 ) { $cluster_col .= $delta.'::('.$formula.')::'.$hmdb_id.'|' ; }
639 else { $cluster_col .= $delta.'::('.$formula.')::'.$hmdb_id ; }
640
641 }
642 $check_rebond = 0 ; ## reinit double control
643 $index_entries++ ;
644 } ## end foreach
645 if ( !defined $cluster_col ) { $cluster_col = 'No_result_found_on_HMDB' ; }
646 push (@clusters, $cluster_col) ;
647 push (@hmdb_matrix, \@clusters) ;
648 $index_mz++ ;
649 }
650 return(\@hmdb_matrix) ;
651 }
652 ## END of SUB
653
654 =head2 METHOD set_hmdb_matrix_object_with_ids
655
656 ## Description : build the hmdb_row under its ref form (IDS only)
657 ## Input : $header, $init_mzs, $entries
658 ## Output : $hmdb_matrix
659 ## Usage : my ( $hmdb_matrix ) = set_hmdb_matrix_object_with_ids( $header, $init_mzs, $entries ) ;
660
661 =cut
662 ## START of SUB
663 sub set_hmdb_matrix_object_with_ids {
664 ## Retrieve Values
665 my $self = shift ;
666 my ( $header, $init_mzs, $entries ) = @_ ;
667
668 my @hmdb_matrix = () ;
669
670 if ( defined $header ) {
671 my @headers = () ;
672 push @headers, $header ;
673 push @hmdb_matrix, \@headers ;
674 }
675
676 my $index_mz = 0 ;
677
678 foreach my $mz ( @{$init_mzs} ) {
679
680 my $index_entries = 0 ;
681 my @clusters = () ;
682 my $cluster_col = undef ;
683
684 my @anti_redondant = ('N/A') ;
685 my $check_rebond = 0 ;
686
687 my $nb_entries = scalar (@{ $entries->[$index_mz] }) ;
688
689 foreach my $entry (@{ $entries->[$index_mz] }) {
690
691 ## dispo anti doublons des entries
692 foreach my $rebond (@anti_redondant) {
693 if ( $rebond eq $entries->[$index_mz][$index_entries]{ENTRY_ENTRY_ID} ) { $check_rebond = 1 ; last ; }
694 }
695
696 if ( $check_rebond == 0 ) {
697
698 push ( @anti_redondant, $entries->[$index_mz][$index_entries]{ENTRY_ENTRY_ID} ) ;
699 my $hmdb_id = $entries->[$index_mz][$index_entries]{ENTRY_ENTRY_ID} ;
700
701 ## METLIN data display model -- IDs ONLY !!
702 ## entry1=VAR1::VAR2::VAR3::VAR4|entry2=VAR1::VAR2::VAR3::VAR4|...
703 # manage final pipe
704 if ($index_entries < $nb_entries-1 ) { $cluster_col .= $hmdb_id.'|' ; }
705 else { $cluster_col .= $hmdb_id ; }
706
707 }
708 $check_rebond = 0 ; ## reinit double control
709 $index_entries++ ;
710 } ## end foreach
711 if ( !defined $cluster_col ) { $cluster_col = 'No_result_found_on_HMDB' ; }
712 push (@clusters, $cluster_col) ;
713 push (@hmdb_matrix, \@clusters) ;
714 $index_mz++ ;
715 }
716 return(\@hmdb_matrix) ;
717 }
718 ## END of SUB
719
720 =head2 METHOD add_lm_matrix_to_input_matrix
721
722 ## Description : build a full matrix (input + lm column)
723 ## Input : $input_matrix_object, $lm_matrix_object, $nb_header
724 ## Output : $output_matrix_object
725 ## Usage : my ( $output_matrix_object ) = add_lm_matrix_to_input_matrix( $input_matrix_object, $lm_matrix_object, $nb_header ) ;
726
727 =cut
728 ## START of SUB
729 sub add_lm_matrix_to_input_matrix {
730 ## Retrieve Values
731 my $self = shift ;
732 my ( $input_matrix_object, $lm_matrix_object, $nb_header ) = @_ ;
733
734 my @output_matrix_object = () ;
735 my $index_row = 0 ;
736 my $line = 0 ;
737
738 foreach my $row ( @{$input_matrix_object} ) {
739 my @init_row = @{$row} ;
740 $line++;
741
742 if ( ( defined $nb_header ) and ( $line <= $nb_header) ) {
743 push (@output_matrix_object, \@init_row) ;
744 next ;
745 }
746
747 if ( $lm_matrix_object->[$index_row] ) {
748 my $dim = scalar(@{$lm_matrix_object->[$index_row]}) ;
749
750 if ($dim > 1) { warn "the add method can't manage more than one column\n" ;}
751 my $lm_col = $lm_matrix_object->[$index_row][$dim-1] ;
752
753 push (@init_row, $lm_col) ;
754 $index_row++ ;
755 }
756 push (@output_matrix_object, \@init_row) ;
757 }
758 return(\@output_matrix_object) ;
759 }
760 ## END of SUB
761
762 =head2 METHOD write_csv_skel
763
764 ## Description : prepare and write csv output file
765 ## Input : $csv_file, $rows
766 ## Output : $csv_file
767 ## Usage : my ( $csv_file ) = write_csv_skel( $csv_file, $rows ) ;
768
769 =cut
770 ## START of SUB
771 sub write_csv_skel {
772 ## Retrieve Values
773 my $self = shift ;
774 my ( $csv_file, $rows ) = @_ ;
775
776 my $ocsv = lib::csv::new() ;
777 my $csv = $ocsv->get_csv_object("\t") ;
778 $ocsv->write_csv_from_arrays($csv, $$csv_file, $rows) ;
779
780 return($csv_file) ;
781 }
782 ## END of SUB
783
784 =head2 METHOD write_csv_one_mass
785
786 ## Description : print a cvs file
787 ## Input : $masses, $ids, $results, $file
788 ## Output : N/A
789 ## Usage : write_csv_one_mass( $ids, $results, $file ) ;
790
791 =cut
792 ## START of SUB
793 sub write_csv_one_mass {
794 ## Retrieve Values
795 my $self = shift ;
796 my ( $masses, $ids, $results, $file, ) = @_ ;
797
798 open(CSV, '>:utf8', "$file") or die "Cant' create the file $file\n" ;
799 print CSV "ID\tMASS_SUBMIT\tHMDB_ID\tCPD_FORMULA\tCPD_MW\tDELTA\n" ;
800
801 my $i = 0 ;
802
803 foreach my $id (@{$ids}) {
804 my $mass = undef ;
805 if ( $masses->[$i] ) { $mass = $masses->[$i] ; }
806 else { last ; }
807
808 if ( $results->[$i] ) { ## an requested id has a result in the list of hashes $results.
809
810 my @anti_redondant = ('N/A') ;
811 my $check_rebond = 0 ;
812 my $check_noentry = 0 ;
813
814 foreach my $entry (@{$results->[$i]}) {
815 $check_noentry ++ ;
816 ## dispo anti doublons des entries
817 foreach my $rebond (@anti_redondant) {
818 if ( $rebond eq $entry->{ENTRY_ENTRY_ID} ) { $check_rebond = 1 ; last ; }
819 }
820 # print "\n-----------------------" ;
821 # print Dumper $entry->{ENTRY_ENTRY_ID} ;
822 # print "-------------------------$check_rebond\n" ;
823 # print Dumper @anti_redondant ;
824 if ( $check_rebond == 0 ) {
825
826 push ( @anti_redondant, $entry->{ENTRY_ENTRY_ID} ) ;
827
828 print CSV "$id\t$mass\t$entry->{ENTRY_ENTRY_ID}\t" ;
829 ## print cpd name
830 if ( $entry->{ENTRY_FORMULA} ) { print CSV "$entry->{ENTRY_FORMULA}\t" ; }
831 else { print CSV "N/A\t" ; }
832 ## print cpd mw
833 if ( $entry->{ENTRY_CPD_MZ} ) { print CSV "$entry->{ENTRY_CPD_MZ}\t" ; }
834 else { print CSV "N/A\t" ; }
835 ## print delta
836 if ( $entry->{ENTRY_DELTA} ) { print CSV "$entry->{ENTRY_DELTA}\n" ; }
837 else { print CSV "N/A\n" ; }
838 }
839 $check_rebond = 0 ; ## reinit double control
840 } ## end foreach
841 if ($check_noentry == 0 ) {
842 print CSV "$id\t$mass\t".'No_result_found_on_HMDB'."\tn/a\tn/a\t0\n" ;
843 }
844 }
845 $i++ ;
846 }
847 close(CSV) ;
848 return() ;
849 }
850 ## END of SUB
851
852 1 ;
853
854
855 __END__
856
857 =head1 SUPPORT
858
859 You can find documentation for this module with the perldoc command.
860
861 perldoc hmdb.pm
862
863 =head1 Exports
864
865 =over 4
866
867 =item :ALL is ...
868
869 =back
870
871 =head1 AUTHOR
872
873 Franck Giacomoni E<lt>franck.giacomoni@clermont.inra.frE<gt>
874
875 =head1 LICENSE
876
877 This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself.
878
879 =head1 VERSION
880
881 version 1 : 06 / 06 / 2013
882
883 version 2 : 27 / 01 / 2014
884
885 version 3 : 19 / 11 / 2014
886
887 version 4 : 28 / 01 / 2016
888
889 =cut