Mercurial > repos > fgiacomoni > massbank_ws_searchspectrum
comparison lib/mapper.pm @ 0:023c380900ef draft default tip
Init repository with last massbank_ws_searchspectrum master version
author | fgiacomoni |
---|---|
date | Wed, 19 Apr 2017 11:31:58 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:023c380900ef |
---|---|
1 package lib::mapper ; | |
2 | |
3 use strict; | |
4 use warnings ; | |
5 use Exporter ; | |
6 use Carp ; | |
7 | |
8 use Data::Dumper ; | |
9 | |
10 use vars qw($VERSION @ISA @EXPORT %EXPORT_TAGS); | |
11 | |
12 our $VERSION = "1.0"; | |
13 our @ISA = qw(Exporter); | |
14 our @EXPORT = qw( add_min_max_for_pcgroup_res get_massbank_records_by_chunk compute_ids_from_pcgroups_res filter_pcgroup_res get_pcgroup_list get_pcgroups set_massbank_matrix_object add_massbank_matrix_to_input_matrix map_pc_to_generic_json set_html_tbody_object add_mz_to_tbody_object add_entries_to_tbody_object); | |
15 our %EXPORT_TAGS = ( ALL => [qw( add_min_max_for_pcgroup_res get_massbank_records_by_chunk compute_ids_from_pcgroups_res filter_pcgroup_res get_pcgroup_list get_pcgroups set_massbank_matrix_object add_massbank_matrix_to_input_matrix map_pc_to_generic_json set_html_tbody_object add_mz_to_tbody_object add_entries_to_tbody_object)] ); | |
16 | |
17 =head1 NAME | |
18 | |
19 My::Module - An example module | |
20 | |
21 =head1 SYNOPSIS | |
22 | |
23 use My::Module; | |
24 my $object = My::Module->new(); | |
25 print $object->as_string; | |
26 | |
27 =head1 DESCRIPTION | |
28 | |
29 This module does not really exist, it | |
30 was made for the sole purpose of | |
31 demonstrating how POD works. | |
32 | |
33 =head1 METHODS | |
34 | |
35 Methods are : | |
36 | |
37 =head2 METHOD new | |
38 | |
39 ## Description : new | |
40 ## Input : $self | |
41 ## Ouput : bless $self ; | |
42 ## Usage : new() ; | |
43 | |
44 =cut | |
45 | |
46 sub new { | |
47 ## Variables | |
48 my $self={}; | |
49 bless($self) ; | |
50 return $self ; | |
51 } | |
52 ### END of SUB | |
53 | |
54 =head2 METHOD get_pcgroups | |
55 | |
56 ## Description : get and prepare pcgroup features (mzs, into, names) from input cvs parser | |
57 ## Input : $pcs, $mzs, $ints, $names | |
58 ## Output : $pcgroups | |
59 ## Usage : my ( $pcgroups ) = get_pcgroups( $pcs, $mzs, $ints, $names ) ; | |
60 | |
61 =cut | |
62 ## START of SUB | |
63 sub get_pcgroups { | |
64 my $self = shift; | |
65 my ( $pcs, $mzs, $ints ) = @_; | |
66 | |
67 my %pcgroups = () ; | |
68 my $i = 0 ; | |
69 | |
70 ## Warn diff matrix dimension : | |
71 my $num_pcs = scalar(@{$pcs}) ; | |
72 my $num_mzs = scalar(@{$mzs}) ; | |
73 my $num_ints = scalar(@{$ints}) ; | |
74 | |
75 if ( ($num_pcs == $num_mzs ) and ( $num_mzs == $num_ints ) ) { | |
76 my @pcs = @{$pcs} ; | |
77 | |
78 foreach my $pc (@{$pcs}) { | |
79 | |
80 if ( ! $pcgroups{$pc} ) { $pcgroups{$pc}->{'id'} = $pc ; $pcgroups{$pc}->{'annotation'} = {} ; $pcgroups{$pc}->{'massbank_ids'} = [] ; } | |
81 | |
82 push (@{$pcgroups{$pc}->{'mzmed'}}, $mzs->[$i]) if ($mzs->[$i]) ; ## map mzs by pcgroup | |
83 | |
84 if ($ints->[$i] > 0 ) { push (@{$pcgroups{$pc}->{'into'}}, $ints->[$i]) ; ## map into by pcgroup | |
85 } | |
86 elsif ($ints->[$i] == 0) { | |
87 push (@{$pcgroups{$pc}->{'into'}}, $ints->[$i]) ; ## map into by pcgroup even value is 0 | |
88 } | |
89 else { | |
90 warn "Undefined value found in pcgroups array\n" ; | |
91 } | |
92 $i++ ; | |
93 } | |
94 } | |
95 else { | |
96 warn "The different ARRAYS (pcs, mzs, ints) doesn't have the same size : mapping is not possible \n!!" | |
97 } | |
98 return (\%pcgroups) ; | |
99 } | |
100 ### END of SUB | |
101 | |
102 =head2 METHOD get_pcgroup_list | |
103 | |
104 ## Description : get and prepare unik pcgroup list from input cvs parsed list | |
105 ## Input : $pcs | |
106 ## Output : $list | |
107 ## Usage : my ( $list ) = get_pcgroup_list( $pcs ) ; | |
108 | |
109 =cut | |
110 ## START of SUB | |
111 sub get_pcgroup_list { | |
112 my $self = shift; | |
113 my ( $pcs ) = @_; | |
114 | |
115 my @pcgroup_list = () ; | |
116 my $i = 0 ; | |
117 | |
118 my %hash = map { $_, 1 } @{$pcs} ; | |
119 @pcgroup_list = keys %hash; | |
120 @pcgroup_list = sort { $a <=> $b } @pcgroup_list ; | |
121 | |
122 return (\@pcgroup_list) ; | |
123 } | |
124 | |
125 ### END of SUB | |
126 | |
127 | |
128 =head2 METHOD filter_pcgroup_res | |
129 | |
130 ## Description : This method filter the results returned by massbank with a user defined score threshold | |
131 ## Input : $pcgroups, $threshold | |
132 ## Output : $pcgroups | |
133 ## Usage : my ( $pcgroups ) = filter_pcgroup_res ( $pcgroups, $threshold ) ; | |
134 | |
135 =cut | |
136 ## START of SUB | |
137 sub filter_pcgroup_res { | |
138 ## Retrieve Values | |
139 my $self = shift ; | |
140 my ( $pcgroups, $threshold ) = @_ ; | |
141 | |
142 my %temp = () ; | |
143 | |
144 if (!defined $threshold) { | |
145 $threshold = 0.5 ; ## default value | |
146 } | |
147 | |
148 if ( (defined $pcgroups) and (defined $threshold) ) { | |
149 %temp = %{$pcgroups} ; | |
150 | |
151 foreach my $pc (keys %temp) { | |
152 | |
153 if ( $temp{$pc}{'annotation'}{'num_res'} > 0 ) { | |
154 my @filtered_annot = reverse(grep { $_->{'score'} >= $threshold if ($_->{'score'}) } @{$temp{$pc}{'annotation'}{'res'}}) ; | |
155 my $new_num_res = scalar (@filtered_annot) ; | |
156 my @ids = () ; | |
157 foreach (@filtered_annot) { push (@ids, $_->{'id'} ) } | |
158 $temp{$pc}{'annotation'}{'res'} =\@filtered_annot ; | |
159 $temp{$pc}{'annotation'}{'num_res'} = $new_num_res ; | |
160 $temp{$pc}{'massbank_ids'} = \@ids ; | |
161 } | |
162 else { | |
163 warn "No result found for this pcgroup $pc\n" ; | |
164 } | |
165 } | |
166 } ## End IF | |
167 else { | |
168 warn "No pcgroup and threshold defined\n" ; | |
169 } | |
170 return (\%temp) ; | |
171 } | |
172 ### END of SUB | |
173 | |
174 =head2 METHOD add_min_max_for_pcgroup_res | |
175 | |
176 ## Description : This method add min / max value for each mzmed contained in pcgroup | |
177 ## Input : $pcgroups | |
178 ## Output : $pcgroups | |
179 ## Usage : my ( $pcgroups ) = add_min_max_for_pcgroup_res ( $pcgroups ) ; | |
180 | |
181 =cut | |
182 ## START of SUB | |
183 sub add_min_max_for_pcgroup_res { | |
184 ## Retrieve Values | |
185 my $self = shift ; | |
186 my ( $pcgroups, $delta ) = @_ ; | |
187 | |
188 my %temp = () ; | |
189 | |
190 if (!defined $delta) { | |
191 $delta = 0.01 ; ## default value | |
192 } | |
193 | |
194 if ( defined $pcgroups) { | |
195 %temp = %{$pcgroups} ; | |
196 | |
197 foreach my $pc (keys %temp) { | |
198 my %mz_intervales = () ; | |
199 if ( $temp{$pc}{'mzmed'} ) { | |
200 my @temp = @{$temp{$pc}{'mzmed'}} ; | |
201 foreach my $mz (@temp) { | |
202 my ($min, $max) = lib::mapper::new->min_and_max_from_double_with_delta($mz, 'Da', $delta); | |
203 $mz_intervales{$mz} = {'min' => $min, 'max' => $max } ; | |
204 } | |
205 } | |
206 else { | |
207 warn "No mzmed found for this pcgroup\n" ; | |
208 } | |
209 $temp{$pc}{'intervales'} = \%mz_intervales ; | |
210 | |
211 } | |
212 } ## End IF | |
213 else { | |
214 warn "No pcgroup and threshold defined\n" ; | |
215 } | |
216 return (\%temp) ; | |
217 } | |
218 ### END of SUB | |
219 | |
220 | |
221 | |
222 =head2 METHOD min_and_max_from_double_with_delta | |
223 | |
224 ## Description : returns the minimum and maximum double according to the delta | |
225 ## Input : \$double, \$delta_type, \$delta | |
226 ## Output : \$min, \$max | |
227 ## Usage : ($min, $max)= min_and_max_from_double_with_delta($double, $delta_type, $mz_delta) ; | |
228 | |
229 =cut | |
230 ## START of SUB | |
231 sub min_and_max_from_double_with_delta { | |
232 ## Retrieve Values | |
233 my $self = shift ; | |
234 my ( $double, $delta_type, $delta ) = @_ ; | |
235 my ( $min, $max ) = ( undef, undef ) ; | |
236 | |
237 if ($delta_type eq 'ppm'){ | |
238 $min = $double - ($delta * 10**-6 * $double); | |
239 $max = $double + ($delta * 10**-6 * $double) + 0.0000000001; ## it's to included the maximum value in the search | |
240 } | |
241 elsif ($delta_type eq 'Da'){ | |
242 $min = $double - $delta; | |
243 $max = $double + $delta + 0.0000000001; ## it's to included the maximum value in the search | |
244 } | |
245 else { croak "The double delta type '$delta_type' isn't a valid type !\n" ; } | |
246 | |
247 return($min, $max) ; | |
248 } | |
249 ## END of SUB | |
250 | |
251 | |
252 =head2 METHOD compute_ids_from_pcgroups_res | |
253 | |
254 ## Description : get all ids returned by massbank with sent queries and keep only unique ones. | |
255 ## Input : $pcgroups | |
256 ## Output : $unique_ids | |
257 ## Usage : my ( $unique_ids ) = compute_ids_from_pcgroups_res ( $pcgroups ) ; | |
258 | |
259 =cut | |
260 ## START of SUB | |
261 sub compute_ids_from_pcgroups_res { | |
262 ## Retrieve Values | |
263 my $self = shift ; | |
264 my ( $pcgroups ) = @_; | |
265 my ( @ids, @unique ) = ( (), () ) ; | |
266 | |
267 if ( defined $pcgroups ) { | |
268 | |
269 foreach my $pc ( keys %{$pcgroups} ) { | |
270 if ( $pcgroups->{$pc}{'massbank_ids'} ) { | |
271 push (@ids , @{ $pcgroups->{$pc}{'massbank_ids'} } ) ; | |
272 } | |
273 } | |
274 | |
275 if ( ( scalar (@ids) ) > 0 ) { | |
276 # print Dumper @ids ; | |
277 @unique = do { my %seen; grep { !$seen{$_}++ if (defined $_) } @ids }; | |
278 @unique = sort { $a cmp $b } @unique; | |
279 } | |
280 else { | |
281 @unique = () ; | |
282 } | |
283 | |
284 | |
285 } | |
286 return (\@unique) ; | |
287 } | |
288 ### END of SUB | |
289 | |
290 | |
291 =head2 METHOD get_massbank_records_by_chunk | |
292 | |
293 ## Description : get massbank records from a complete list but send queries chunk by chunk. | |
294 ## Input : $ids, $chunk_size | |
295 ## Output : $records | |
296 ## Usage : my ( $records ) = get_massbank_records_by_chunk ( $ids, $chunk_size ) ; | |
297 | |
298 =cut | |
299 ## START of SUB | |
300 sub get_massbank_records_by_chunk { | |
301 ## Retrieve Values | |
302 my $self = shift ; | |
303 my ( $server, $ids, $chunk_size ) = @_; | |
304 my ( @records, @sent_ids ) = ( (), () ) ; | |
305 | |
306 my $current = 0 ; | |
307 my $pos = 1 ; | |
308 my @temp_ids = () ; | |
309 | |
310 my $num_ids = scalar(@{$ids}) ; | |
311 # print "The number of given massbank ids is: $num_ids\n" ; | |
312 | |
313 foreach my $id (@{$ids}) { | |
314 $current++ ; | |
315 # print "$id - - $current/$num_ids) - - $pos \n" ; | |
316 | |
317 if ( ($current == $num_ids) or ($pos == $chunk_size) ) { | |
318 # print "Querying Massbank with...\n" ; | |
319 push (@temp_ids, $id) ; | |
320 ## send query | |
321 my $omassbank = lib::massbank_api->new() ; | |
322 my ($osoap) = $omassbank->selectMassBank($server) ; | |
323 my ($records) = $omassbank->getRecordInfo($osoap, \@temp_ids) ; | |
324 push (@records, @{$records}) ; | |
325 | |
326 @temp_ids = () ; | |
327 $pos = 0 ; | |
328 } | |
329 elsif ($pos < $chunk_size) { | |
330 # print "store...\n"; | |
331 push (@temp_ids, $id) ; | |
332 $pos ++ ; | |
333 } | |
334 else { | |
335 warn "Something goes wrong : out of range\n" | |
336 } | |
337 | |
338 | |
339 } | |
340 my $num_records = scalar(@records) ; | |
341 # print "The number of received massbank records is: $num_records\n" ; | |
342 return (\@records) ; | |
343 } | |
344 ### END of SUB | |
345 | |
346 =head2 METHOD set_massbank_matrix_object | |
347 | |
348 ## Description : build the massbank_row under its ref form | |
349 ## Input : $header, $init_pcs, $init_mzs, $pcgroups, $records | |
350 ## Output : $massbank_matrix | |
351 ## Usage : my ( $massbank_matrix ) = set_lm_matrix_object( $header, $init_pcs, $init_mzs, $pcgroups, $records ) ; | |
352 | |
353 =cut | |
354 ## START of SUB | |
355 sub set_massbank_matrix_object { | |
356 ## Retrieve Values | |
357 my $self = shift ; | |
358 my ( $header, $init_pcs, $init_mzs, $pcgroups, $records ) = @_ ; | |
359 my @massbank_matrix = () ; | |
360 | |
361 my $current_pos = 0 ; | |
362 | |
363 ## format massbank(score::name::mz::formula::adduct::id) | |
364 if ( defined $header ) { | |
365 $header .= '(score::name::mz::formula::adduct::id)' ; | |
366 my @headers = () ; | |
367 push @headers, $header ; | |
368 push @massbank_matrix, \@headers ; | |
369 } | |
370 | |
371 ## foreach mz of the input file | |
372 foreach my $mz (@{$init_mzs}) { | |
373 | |
374 my $nb_ids = 0 ; | |
375 my @ids = () ; | |
376 | |
377 my $pc = $init_pcs->[$current_pos] ; ## get the rigth pcgroup with maz postion in list | |
378 # print "---> Current PCGROUP is $pc\n" ; | |
379 if ( $pcgroups->{$pc}{'enrich_annotation'}{$mz} ) { | |
380 ## get record_ids | |
381 my @massbank_ids = @{ $pcgroups->{$pc}{'enrich_annotation'}{$mz} } ; ## get validated ids relative to one mz | |
382 $nb_ids = scalar (@massbank_ids) ; | |
383 # print "- - - NB RECORDS FOR MZ $mz = $nb_ids - - STATUS => \t" ; | |
384 my $massbank_ids_string = undef ; | |
385 ## manage empty array | |
386 if (!defined $nb_ids) { carp "The number of massbank ids is not defined\n" ; } | |
387 elsif ( $nb_ids > 0 ) { | |
388 ## get data from records and init_annotation | |
389 my $index_entries = 0 ; | |
390 foreach my $record_id (@massbank_ids) { | |
391 my $massbank_name = $records->{$record_id}{names}[0] ; | |
392 my $massbank_id = $record_id ; | |
393 my $massbank_formula = $records->{$record_id}{formula} ; | |
394 my $massbank_cpd_mz = $records->{$record_id}{exact_mz} ; | |
395 my $massbank_adduct = $records->{$record_id}{precursor_type} ; | |
396 my $massbank_score = 0 ; | |
397 | |
398 ## getting the score | |
399 my @filtered_records= @{ $pcgroups->{$pc}{'annotation'}{res} } ; | |
400 foreach my $record (@filtered_records) { | |
401 if ($record->{id} eq $massbank_id ) { | |
402 $massbank_score = $record->{score} ; | |
403 last ; | |
404 } | |
405 else { | |
406 next ; | |
407 } | |
408 } | |
409 | |
410 ## METLIN data display model | |
411 ## entry1= ENTRY_DELTA::ENTRY_ENTRY_NAME::ENTRY_CPD_MZ::ENTRY_FORMULA::ENTRY_ADDUCT::ENTRY_ENTRY_ID | entry2=VAR1::VAR2::VAR3::VAR4|... | |
412 my $massbank_id_string = $massbank_score.'::['."$massbank_name".']::'.$massbank_cpd_mz.'::'.$massbank_formula.'::['.$massbank_adduct.']::'.$massbank_id ; | |
413 | |
414 # manage final pipe | |
415 if ($index_entries < $nb_ids-1 ) { $massbank_ids_string .= $massbank_id_string.' | ' ; } | |
416 else { $massbank_ids_string .= $massbank_id_string ; } | |
417 $index_entries++; | |
418 } | |
419 } | |
420 elsif ( $nb_ids == 0 ) { $massbank_ids_string = 'NONE' ; } | |
421 else { | |
422 $massbank_ids_string = 'NONE' ; | |
423 } | |
424 # print "$massbank_ids_string\n" ; | |
425 push (@ids, $massbank_ids_string) ; | |
426 } ## End if | |
427 else { | |
428 next; | |
429 } | |
430 $current_pos++ ; | |
431 | |
432 push (@massbank_matrix, \@ids) ; | |
433 } ## End foreach mz | |
434 # print "* * * * Start of the MATRIX: * * * *\n" ; | |
435 # print Dumper @massbank_matrix ; | |
436 # print "* * * * END of the MATRIX * * * *\n" ; | |
437 return(\@massbank_matrix) ; | |
438 } | |
439 ## END of SUB | |
440 | |
441 =head2 METHOD add_massbank_matrix_to_input_matrix | |
442 | |
443 ## Description : build a full matrix (input + lm column) | |
444 ## Input : $input_matrix_object, $massbank_matrix_object | |
445 ## Output : $output_matrix_object | |
446 ## Usage : my ( $output_matrix_object ) = add_massbank_matrix_to_input_matrix( $input_matrix_object, $massbank_matrix_object ) ; | |
447 | |
448 =cut | |
449 ## START of SUB | |
450 sub add_massbank_matrix_to_input_matrix { | |
451 ## Retrieve Values | |
452 my $self = shift ; | |
453 my ( $input_matrix_object, $massbank_matrix_object ) = @_ ; | |
454 | |
455 my @output_matrix_object = () ; | |
456 my $index_row = 0 ; | |
457 | |
458 foreach my $row ( @{$input_matrix_object} ) { | |
459 my @init_row = @{$row} ; | |
460 | |
461 if ( $massbank_matrix_object->[$index_row] ) { | |
462 my $dim = scalar(@{$massbank_matrix_object->[$index_row]}) ; | |
463 | |
464 if ($dim > 1) { warn "the add method can't manage more than one column\n" ;} | |
465 my $lm_col = $massbank_matrix_object->[$index_row][$dim-1] ; | |
466 | |
467 push (@init_row, $lm_col) ; | |
468 $index_row++ ; | |
469 } | |
470 push (@output_matrix_object, \@init_row) ; | |
471 } | |
472 return(\@output_matrix_object) ; | |
473 } | |
474 ## END of SUB | |
475 | |
476 =head2 METHOD map_res_to_generic_json | |
477 | |
478 ## Description : build json structure with all massbank results | |
479 ## Input : $mzs, $pcs, $pcgroups_results | |
480 ## Output : $json_scalar | |
481 ## Usage : my ( $json_scalar ) = add_massbank_matrix_to_input_matrix( $mzs, $pcs, $pcgroups_results ) ; | |
482 | |
483 =cut | |
484 ## START of SUB | |
485 sub map_pc_to_generic_json { | |
486 my $self = shift; | |
487 my ( $pcs, $pcgroups, $records ) = @_ ; | |
488 | |
489 # print Dumper $pcgroups ; | |
490 # print Dumper $records ; | |
491 | |
492 ## JSON DESIGN | |
493 my %JSON = ( | |
494 QUERY => {}, | |
495 PARAM => {}, | |
496 TYPE => {} | |
497 ) ; | |
498 | |
499 my %oEntry = ( | |
500 mzmed => undef, | |
501 into => undef, | |
502 mzmin => undef, | |
503 mzmax => undef, | |
504 pcgroup => undef, | |
505 num_res => undef, | |
506 RECORDS => undef, | |
507 ) ; | |
508 | |
509 | |
510 my %oRecord = ( | |
511 id => undef, | |
512 exact_mz => undef, | |
513 score => undef, | |
514 formula => undef, | |
515 inchi => undef, | |
516 ms_type => undef, | |
517 precursor_type => undef, | |
518 instrument_type => undef, | |
519 name => undef, | |
520 peaks => undef, | |
521 ) ; | |
522 | |
523 | |
524 | |
525 foreach my $pc (@{$pcs}) { | |
526 | |
527 my $pc_res = {} ; | |
528 my $num_res = undef ; | |
529 | |
530 if ($pcgroups->{$pc}) { | |
531 my $pos = 0 ; | |
532 ## foreach mz of the pcgroup | |
533 foreach my $mz (@{ $pcgroups->{$pc}{mzmed} } ) { | |
534 | |
535 my %entry = %oEntry ; | |
536 ## | |
537 if ( defined $mz ) { $entry{mzmed} = $mz ; } | |
538 if ( $pcgroups->{$pc}{intervales}{$mz} ) { $entry{mzmin} = $pcgroups->{$pc}{intervales}{$mz}{min} ; } | |
539 if ( $pcgroups->{$pc}{intervales}{$mz} ) { $entry{mzmax} = $pcgroups->{$pc}{intervales}{$mz}{max} ; } | |
540 if ( $pcgroups->{$pc}{into}[$pos] ) { $entry{into} = $pcgroups->{$pc}{into}[$pos] ; } | |
541 if ( defined $pc ) { $entry{pcgroup} = $pc ; } | |
542 ## get RECORDS | |
543 if ( $pcgroups->{$pc}{enrich_annotation}{$mz} ) { | |
544 | |
545 my @recs = @{ $pcgroups->{$pc}{enrich_annotation}{$mz} } ; | |
546 $entry{num_res} = scalar(@recs) ; | |
547 | |
548 foreach my $recId (@recs) { | |
549 | |
550 my %record = %oRecord ; | |
551 if ( $records->{$recId} ) { $record{id} = $recId ; } | |
552 if ( $records->{$recId}{exact_mz} ) { $record{exact_mz} = $records->{$recId}{exact_mz} ; } | |
553 if ( $records->{$recId}{formula} ) { $record{formula} = $records->{$recId}{formula} ; } | |
554 if ( $records->{$recId}{ms_type} ) { $record{ms_type} = $records->{$recId}{ms_type} ; } | |
555 if ( $records->{$recId}{precursor_type} ) { $record{precursor_type} = $records->{$recId}{precursor_type} ; } | |
556 if ( $records->{$recId}{instrument_type} ) { $record{instrument_type} = $records->{$recId}{instrument_type} ; } | |
557 if ( $records->{$recId}{names} ) { $record{name} = $records->{$recId}{names}[0] ; } | |
558 if ( $records->{$recId}{inchi} ) { $record{inchi} = $records->{$recId}{inchi} ; } | |
559 ## peaks TODO... | |
560 | |
561 ## Score / BIG SHIT / | |
562 foreach my $record (@{ $pcgroups->{$pc}{'annotation'}{res} }) { | |
563 if ($record->{id} eq $recId ) { | |
564 $record{score} = $record->{score} ; | |
565 last ; | |
566 } | |
567 else { | |
568 next ; | |
569 } | |
570 } ## foreach record - - - for score | |
571 $entry{RECORDS}{$recId} = \%record ; | |
572 } ## foreach recId | |
573 } ## end IF | |
574 | |
575 $JSON{QUERY}{$mz} = \%entry ; | |
576 $pos ++ ; | |
577 } ## End FOREACH MZ | |
578 } | |
579 else { | |
580 warn "The pc group $pc doesn't exist in results !" ; | |
581 } | |
582 } | |
583 # print Dumper %JSON ; | |
584 return(\%JSON) ; | |
585 } | |
586 ## END of SUB | |
587 | |
588 | |
589 =head2 METHOD mapGroupsWithRecords | |
590 | |
591 ## Description : map records with pcgroups mz to adjust massbank id annotations | |
592 ## Input : $pcgroups, $records | |
593 ## Output : $pcgroups | |
594 ## Usage : my ( $var4 ) = mapGroupsWithRecords ( $$pcgroups, $records ) ; | |
595 | |
596 =cut | |
597 ## START of SUB | |
598 sub mapGroupsWithRecords { | |
599 ## Retrieve Values | |
600 my $self = shift ; | |
601 my ( $pcgroups, $records ) = @_; | |
602 | |
603 my %temp = () ; | |
604 my (%intervales, @annotation_ids) = ( (), () ) ; | |
605 | |
606 if ( ( defined $pcgroups ) and ( defined $records ) ) { | |
607 | |
608 %temp = %{$pcgroups} ; | |
609 my %unik_real_ids = () ; | |
610 my @real_ids = () ; | |
611 | |
612 foreach my $pc (keys %temp) { | |
613 | |
614 if ( $temp{$pc}{'intervales'} ) { %intervales = %{$temp{$pc}{'intervales'}} ; } | |
615 else { warn "Cant't find any intervale values\n" ; } | |
616 if ( $temp{$pc}{'massbank_ids'} ) { @annotation_ids = @{$temp{$pc}{'massbank_ids'}} ; } | |
617 else { warn "Cant't find any massbank id values\n" ; } | |
618 | |
619 # print Dumper %intervales; | |
620 # print Dumper @annotation_ids ; | |
621 | |
622 ## map with intervales | |
623 foreach my $mz (keys %intervales) { | |
624 my @filteredIds = () ; | |
625 my ( $min, $max ) = ( $intervales{$mz}{'min'}, $intervales{$mz}{'max'} ) ; | |
626 | |
627 foreach my $id (@annotation_ids) { | |
628 # print "Analyse mzs of id: $id...\n" ; | |
629 if ( (defined $id) and ( $records->{$id}) ) { | |
630 | |
631 my %currentRecord = %{$records->{$id}} ; | |
632 | |
633 if (scalar @{$currentRecord{'peaks'} } > 0 ) { | |
634 ## | |
635 foreach my $peak_mz (@{ $currentRecord{'peaks'} } ) { | |
636 if ($peak_mz) { | |
637 my $record_mz = $peak_mz->{'mz'} ; | |
638 if ( ($record_mz > $min ) and ($record_mz < $max) ){ | |
639 | |
640 if (!exists $unik_real_ids{$id}) { | |
641 $unik_real_ids{$id} = 1 ; | |
642 push (@filteredIds, $id) ; | |
643 # print "$mz - - $id\n" ; | |
644 } | |
645 | |
646 | |
647 } | |
648 else { | |
649 next ; | |
650 } | |
651 } | |
652 else { | |
653 warn "The mz field is not defined\n" ; | |
654 } | |
655 } ## foreach | |
656 } | |
657 else { | |
658 warn "The record ($id) has no peak\n" ; | |
659 } | |
660 } | |
661 else { | |
662 if (defined $id) { | |
663 warn "The id $id seems to be not present in getting records\n" ; | |
664 } | |
665 else { | |
666 warn "This catched id seems to be undef in getting records\n" ; | |
667 } | |
668 | |
669 next ; | |
670 } | |
671 } ## end foreach | |
672 ## to avoid multiple ids | |
673 # foreach my $id (keys %unik_real_ids) { | |
674 # push(@real_ids, $id) ; | |
675 # } | |
676 %unik_real_ids = () ; | |
677 # my @temp = @real_ids ; | |
678 my @temp = @filteredIds ; | |
679 $temp{$pc}{'enrich_annotation'}{$mz} = \@temp ; | |
680 @real_ids = () ; | |
681 @filteredIds = () ; | |
682 } ## End foreach mz | |
683 @annotation_ids = () ; | |
684 } ## End foreach pc | |
685 } | |
686 else { | |
687 warn"Can't find record or pcgroup data\n" ; | |
688 } | |
689 | |
690 return (\%temp) ; | |
691 } | |
692 ### END of SUB | |
693 | |
694 =head2 METHOD set_html_tbody_object | |
695 | |
696 ## Description : initializes and build the tbody object (perl array) needed to html template | |
697 ## Input : $nb_pages, $nb_items_per_page | |
698 ## Output : $tbody_object | |
699 ## Usage : my ( $tbody_object ) = set_html_tbody_object($nb_pages, $nb_items_per_page) ; | |
700 | |
701 =cut | |
702 ## START of SUB | |
703 sub set_html_tbody_object { | |
704 my $self = shift ; | |
705 my ( $nb_pages, $nb_items_per_page ) = @_ ; | |
706 | |
707 my ( @tbody_object ) = ( ) ; | |
708 | |
709 for ( my $i = 1 ; $i <= $nb_pages ; $i++ ) { | |
710 | |
711 my %pages = ( | |
712 # tbody feature | |
713 PAGE_NB => $i, | |
714 MASSES => [], ## end MASSES | |
715 ) ; ## end TBODY N | |
716 push (@tbody_object, \%pages) ; | |
717 } | |
718 return(\@tbody_object) ; | |
719 } | |
720 ## END of SUB | |
721 | |
722 =head2 METHOD add_mz_to_tbody_object | |
723 | |
724 ## Description : initializes and build the mz object (perl array) needed to html template | |
725 ## Input : $tbody_object, $nb_items_per_page, $mz_list | |
726 ## Output : $tbody_object | |
727 ## Usage : my ( $tbody_object ) = add_mz_to_tbody_object( $tbody_object, $nb_items_per_page, $mz_list ) ; | |
728 | |
729 =cut | |
730 ## START of SUB | |
731 sub add_mz_to_tbody_object { | |
732 my $self = shift ; | |
733 my ( $tbody_object, $nb_items_per_page, $mz_list, $json ) = @_ ; | |
734 | |
735 my ( $current_page, $mz_index ) = ( 0, 0 ) ; | |
736 | |
737 foreach my $page ( @{$tbody_object} ) { | |
738 | |
739 my @colors = ('white', 'green') ; | |
740 my ( $current_index, , $icolor ) = ( 0, 0 ) ; | |
741 | |
742 for ( my $i = 1 ; $i <= $nb_items_per_page ; $i++ ) { | |
743 # | |
744 if ( $current_index > $nb_items_per_page ) { ## manage exact mz per html page | |
745 $current_index = 0 ; | |
746 last ; ## | |
747 } | |
748 else { | |
749 $current_index++ ; | |
750 if ( $icolor > 1 ) { $icolor = 0 ; } | |
751 | |
752 if ( exists $mz_list->[$mz_index] ) { | |
753 | |
754 my %mz = ( | |
755 # mass feature | |
756 MASSES_ID_QUERY => "mz_0".sprintf("%04s", $mz_index+1 ) , | |
757 MASSES_MZ_QUERY => $mz_list->[$mz_index], | |
758 MASSES_PCGROUP_QUERY => $json->{QUERY}{ $mz_list->[$mz_index] }{pcgroup} , | |
759 MZ_COLOR => $colors[$icolor], | |
760 MASSES_NB => $mz_index+1, | |
761 ENTRIES => [] , | |
762 ) ; | |
763 push ( @{ $tbody_object->[$current_page]{MASSES} }, \%mz ) ; | |
764 # Html attr for mass | |
765 $icolor++ ; | |
766 } | |
767 } | |
768 $mz_index++ ; | |
769 } ## foreach mz | |
770 | |
771 $current_page++ ; | |
772 } | |
773 return($tbody_object) ; | |
774 } | |
775 ## END of SUB | |
776 | |
777 =head2 METHOD add_entries_to_tbody_object | |
778 | |
779 ## Description : initializes and build the entries object (perl array) needed to html template | |
780 ## Input : $tbody_object, $nb_items_per_page, $mz_list, $entries | |
781 ## Output : $tbody_object | |
782 ## Usage : my ( $tbody_object ) = add_entries_to_tbody_object( $tbody_object, $nb_items_per_page, $mz_list, $entries ) ; | |
783 | |
784 =cut | |
785 ## START of SUB | |
786 sub add_entries_to_tbody_object { | |
787 ## Retrieve Values | |
788 my $self = shift ; | |
789 my ( $tbody_object, $nb_items_per_page, $mz_list, $JSON ) = @_ ; | |
790 | |
791 my $index_page = 0 ; | |
792 my $index_mz_continous = 0 ; | |
793 | |
794 foreach my $page (@{$tbody_object}) { | |
795 | |
796 my $index_mz = 0 ; | |
797 | |
798 foreach my $mz (@{ $tbody_object->[$index_page]{MASSES} }) { | |
799 my $index_entry = 0 ; | |
800 my $check_noentry = 0 ; | |
801 my @toSort = () ; | |
802 | |
803 foreach my $record (keys %{ $JSON->{QUERY}{$mz->{MASSES_MZ_QUERY}}{RECORDS} }) { | |
804 $check_noentry ++ ; | |
805 | |
806 my %entry = ( | |
807 ENTRY_COLOR => $tbody_object->[$index_page]{MASSES}[$index_mz]{MZ_COLOR}, | |
808 ENTRY_ENTRY_NAME => $JSON->{QUERY}{$mz->{MASSES_MZ_QUERY}}{RECORDS}{$record}{name}, | |
809 ENTRY_ENTRY_ID => $JSON->{QUERY}{$mz->{MASSES_MZ_QUERY}}{RECORDS}{$record}{id}, | |
810 ENTRY_ENTRY_ID2 => $JSON->{QUERY}{$mz->{MASSES_MZ_QUERY}}{RECORDS}{$record}{id}, | |
811 ENTRY_FORMULA => $JSON->{QUERY}{$mz->{MASSES_MZ_QUERY}}{RECORDS}{$record}{formula}, | |
812 ENTRY_CPD_MZ => $JSON->{QUERY}{$mz->{MASSES_MZ_QUERY}}{RECORDS}{$record}{exact_mz}, | |
813 ENTRY_MS_TYPE => $JSON->{QUERY}{$mz->{MASSES_MZ_QUERY}}{RECORDS}{$record}{ms_type}, | |
814 ENTRY_PRECURSOR_TYPE => $JSON->{QUERY}{$mz->{MASSES_MZ_QUERY}}{RECORDS}{$record}{precursor_type}, | |
815 ENTRY_INSTRUMENT_TYPE => $JSON->{QUERY}{$mz->{MASSES_MZ_QUERY}}{RECORDS}{$record}{instrument_type}, | |
816 ENTRY_SCORE => $JSON->{QUERY}{$mz->{MASSES_MZ_QUERY}}{RECORDS}{$record}{score}, | |
817 ENTRY_ENTRY_INCHI => $JSON->{QUERY}{$mz->{MASSES_MZ_QUERY}}{RECORDS}{$record}{inchi}, | |
818 ) ; | |
819 push ( @{ $tbody_object->[$index_page]{MASSES}[$index_mz]{ENTRIES} }, \%entry) ; | |
820 | |
821 $index_entry++ ; | |
822 } ## end foreach record | |
823 if ($check_noentry == 0 ) { | |
824 my %entry = ( | |
825 ENTRY_COLOR => $tbody_object->[$index_page]{MASSES}[$index_mz]{MZ_COLOR}, | |
826 ENTRY_ENTRY_NAME => 'UNKNOWN', | |
827 ENTRY_ENTRY_ID => 'NONE', | |
828 ENTRY_ENTRY_ID2 => '', | |
829 ENTRY_FORMULA => 'n/a', | |
830 ENTRY_CPD_MZ => 'n/a', | |
831 ENTRY_MS_TYPE => 'n/a', | |
832 ENTRY_PRECURSOR_TYPE => 'n/a', | |
833 ENTRY_INSTRUMENT_TYPE => 'n/a', | |
834 ENTRY_SCORE => 0, | |
835 ENTRY_ENTRY_INCHI => 'n/a', | |
836 ) ; | |
837 push ( @{ $tbody_object->[$index_page]{MASSES}[$index_mz]{ENTRIES} }, \%entry) ; | |
838 } | |
839 | |
840 ## sorted by score | |
841 my @sorted = () ; | |
842 my @temp = @{ $tbody_object->[$index_page]{MASSES}[$index_mz]{ENTRIES} } ; | |
843 if (scalar (@temp) > 1 ) { ## for mz without record (only one entry with NA or 0 values) | |
844 @sorted = sort { $b->{ENTRY_SCORE} <=> $a->{ENTRY_SCORE} } @temp; | |
845 } | |
846 else { | |
847 @sorted = @temp; | |
848 } | |
849 | |
850 $tbody_object->[$index_page]{MASSES}[$index_mz]{ENTRIES} = \@sorted ; | |
851 | |
852 $index_mz ++ ; | |
853 $index_mz_continous ++ ; | |
854 | |
855 } ## End foreach mz | |
856 $index_page++ ; | |
857 | |
858 } ## End foreach page | |
859 # print Dumper $tbody_object ; | |
860 return($tbody_object) ; | |
861 } | |
862 ## END of SUB | |
863 | |
864 | |
865 | |
866 1 ; | |
867 | |
868 | |
869 __END__ | |
870 | |
871 =head1 SUPPORT | |
872 | |
873 You can find documentation for this module with the perldoc command. | |
874 | |
875 perldoc XXX.pm | |
876 | |
877 =head1 Exports | |
878 | |
879 =over 4 | |
880 | |
881 =item :ALL is ... | |
882 | |
883 =back | |
884 | |
885 =head1 AUTHOR | |
886 | |
887 Franck Giacomoni E<lt>franck.giacomoni@clermont.inra.frE<gt> | |
888 | |
889 =head1 LICENSE | |
890 | |
891 This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. | |
892 | |
893 =head1 VERSION | |
894 | |
895 version 1 : xx / xx / 201x | |
896 | |
897 version 2 : ?? | |
898 | |
899 =cut |