comparison lib/massbank_parser.pm @ 0:023c380900ef draft default tip

Init repository with last massbank_ws_searchspectrum master version
author fgiacomoni
date Wed, 19 Apr 2017 11:31:58 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:023c380900ef
1 package lib::massbank_parser ;
2
3 use strict;
4 use warnings ;
5 use Exporter ;
6 use Carp ;
7
8 use File::Basename;
9
10 use Data::Dumper ;
11
12 use vars qw($VERSION @ISA @EXPORT %EXPORT_TAGS);
13
14 our $VERSION = "1.0" ;
15 our @ISA = qw(Exporter) ;
16 our @EXPORT = qw( getChemNamesFromString getPeaksFromString ) ;
17 our %EXPORT_TAGS = ( ALL => [qw( getChemNamesFromString getPeaksFromString )] ) ;
18
19 =head1 NAME
20
21 parser::chem::massbank - An example module
22
23 =head1 SYNOPSIS
24
25 use parser::chem::massbank ;
26 my $object = parser::chem::massbank->new();
27 print $object->as_string;
28
29 =head1 DESCRIPTION
30
31 This module does not really exist, it
32 was made for the sole purpose of
33 demonstrating how POD works.
34
35 =head1 METHODS
36
37 Methods are :
38
39 =head2 METHOD new
40
41 ## Description : new
42 ## Input : $self
43 ## Ouput : bless $self ;
44 ## Usage : new() ;
45
46 =cut
47
48 sub new {
49 ## Variables
50 my $self={};
51 bless($self) ;
52 return $self ;
53 }
54 ### END of SUB
55
56 =head2 METHOD get_list_of_analysis_intrument_names
57
58 ## Description : permt de retourner la liste des nom uniques des instruments utilises
59 ## Input : $dir, $ms_files (a list of files)
60 ## Output : $names
61 ## Usage : my ( $names ) = get_list_of_analysis_intrument_names( $ms_files ) ;
62
63 =cut
64 ## START of SUB
65 sub get_list_of_analysis_intrument_names {
66 ## Retrieve Values
67 my $self = shift ;
68 my ( $dir, $ms_files ) = @_ ;
69 my (%tmp_names, @names) = ( (), () ) ;
70 foreach my $ms_file (@{$ms_files}) {
71 my $file = $dir.'\\'.$ms_file ;
72 if ( ( defined $file ) and ( -e $file )) {
73 open(MS, "<$file") or die "Cant' read the file $file\n" ;
74 while ( my $field = <MS> ){
75 chomp $field ;
76 if ($field =~/AC\$INSTRUMENT:(.*)/) {
77 if ( $tmp_names{$1} ) { last ; }
78 else { $tmp_names{$1} = 1 ; push (@names, $1) ; }
79 }
80 }
81 close(MS) ;
82 }
83 else {
84 croak "Can't work with a undef / none existing massbank file\n" ;
85 }
86 }
87 return(\@names) ;
88 }
89 ## END of SUB
90
91 =head2 METHOD get_analysis_instruments_data
92
93 ## Description : permet de recuperer tous les champs d'un object massbank
94 ## Input : $ms_file
95 ## Output : $features
96 ## Usage : my ( $features ) = get_analysis_instruments_data( $ms_file ) ;
97
98 =cut
99 ## START of SUB
100 sub get_analysis_instruments_data {
101 ## Retrieve Values
102 my $self = shift ;
103 my ( $ms_file ) = @_ ;
104
105 my $control = 0 ;
106 my %features = (
107 'name' => undef,
108 'type' => undef,
109 ) ;
110 if ( ( defined $ms_file ) and ( -e $ms_file )) {
111 open(MS, "<$ms_file") or die "Cant' read the file $ms_file\n" ;
112 while ( my $field = <MS> ){
113 chomp $field ;
114 if ($field =~/AC\$INSTRUMENT: (.*)/) { $features{'name'} = $1 ; $control++; }
115 elsif ($field =~/AC\$INSTRUMENT_TYPE: (.*)/) { $features{'type'} = $1 ; $control++; }
116 else { next ; }
117 }
118 close(MS) ;
119 }
120 else {
121 croak "Can't work with a undef / none existing massbank file\n" ;
122 }
123 if ($control == 0) { %features = () ; }
124 return(\%features) ;
125 }
126 ## END of SUB
127
128 =head2 METHOD get_ms_methods_data
129
130 ## Description : permet de recuperer tous les champs d'un object massbank
131 ## Input : $ms_file
132 ## Output : $features
133 ## Usage : my ( $features ) = get_ms_methods_data( $ms_file ) ;
134
135 =cut
136 ## START of SUB
137 sub get_ms_methods_data {
138 ## Retrieve Values
139 my $self = shift ;
140 my ( $ms_file ) = @_ ;
141
142 my $control = 0 ;
143 my %features = (
144 'ion_mode' => undef,
145 'ms_type' => undef,
146 'collision_energy' => undef,
147 'collision_gas' => undef,
148 'desolvation_gas_flow' => undef,
149 'desolvation_temperature' => undef,
150 'ionization_energy' => undef,
151 'laser' => undef,
152 'matrix' => undef,
153 'mass_accuracy' => undef,
154 'reagent_gas' => undef,
155 'scanning' => undef
156 ) ;
157 if ( ( defined $ms_file ) and ( -e $ms_file )) {
158 open(MS, "<$ms_file") or die "Cant' read the file $ms_file\n" ;
159 while ( my $field = <MS> ){
160 chomp $field ;
161 if ($field =~/AC\$MASS_SPECTROMETRY: ION_MODE:(.*)/) { $features{'ion_mode'} = $1 ; $control++; } # mandatory
162 elsif ($field =~/AC\$MASS_SPECTROMETRY: MS_TYPE:(.*)/) { $features{'ms_type'} = $1 ; $control++; } # mandatory
163 elsif ($field =~/AC\$MASS_SPECTROMETRY: COLLISION_ENERGY(.*)/) { $features{'collision_energy'} = $1 ; $control++; } # optionnal
164 elsif ($field =~/AC\$MASS_SPECTROMETRY: COLLISION_GAS(.*)/) { $features{'collision_gas'} = $1 ; $control++; } # optionnal
165 elsif ($field =~/AC\$MASS_SPECTROMETRY: DESOLVATION_GAS_FLOW(.*)/) { $features{'desolvation_gas_flow'} = $1 ; $control++; } # optionnal
166 elsif ($field =~/AC\$MASS_SPECTROMETRY: DESOLVATION_TEMPERATURE(.*)/) { $features{'desolvation_temperature'} = $1 ; $control++; } # optionnal
167 elsif ($field =~/AC\$MASS_SPECTROMETRY: IONIZATION_ENERGY(.*)/) { $features{'ionization_energy'} = $1 ; $control++; } # optionnal
168 elsif ($field =~/AC\$MASS_SPECTROMETRY: LASER(.*)/) { $features{'laser'} = $1 ; $control++; } # optionnal
169 elsif ($field =~/AC\$MASS_SPECTROMETRY: MATRIX(.*)/) { $features{'matrix'} = $1 ; $control++; } # optionnal
170 elsif ($field =~/AC\$MASS_SPECTROMETRY: MASS_ACCURACY(.*)/) { $features{'mass_accuracy'} = $1 ; $control++; } # optionnal
171 elsif ($field =~/AC\$MASS_SPECTROMETRY: REAGENT_GAS(.*)/) { $features{'reagent_gas'} = $1 ; $control++; } # optionnal
172 elsif ($field =~/AC\$MASS_SPECTROMETRY: SCANNING(.*)/) { $features{'scanning'} = $1 ; $control++; } # optionnal
173 else { next ; }
174 }
175 close(MS) ;
176 }
177 else {
178 croak "Can't work with a undef / none existing massbank file\n" ;
179 }
180 ## vide l'object si undef
181 if ($control == 0) { %features = () ; }
182 return(\%features) ;
183 }
184 ## END of SUB
185
186 =head2 METHOD get_solvents_data
187
188 ## Description : permet de recuperer tous les champs d'un object massbank
189 ## Input : $ms_file
190 ## Output : $features
191 ## Usage : my ( $features ) = get_solvents_data( $ms_file ) ;
192
193 =cut
194 ## START of SUB
195 sub get_solvents_data {
196 ## Retrieve Values
197 my $self = shift ;
198 my ( $ms_file ) = @_ ;
199
200 my @features = () ;
201 if ( ( defined $ms_file ) and ( -e $ms_file )) {
202 open(MS, "<$ms_file") or die "Cant' read the file $ms_file\n" ;
203 while ( my $field = <MS> ){
204 chomp $field ;
205 if ($field =~/AC\$CHROMATOGRAPHY: SOLVENT(.*)/) { push(@features, 'Solvent '.$1 ) ; }
206 else { next ; }
207 }
208 close(MS) ;
209 }
210 else {
211 croak "Can't work with a undef / none existing massbank file\n" ;
212 }
213 return(\@features) ;
214 }
215 ## END of SUB
216
217 =head2 METHOD get_sample_data
218
219 ## Description : permet de recuperer tous les champs d'un object massbank
220 ## Input : $ms_file
221 ## Output : $features
222 ## Usage : my ( $features ) = get_sample_data( $ms_file ) ;
223
224 =cut
225 ## START of SUB
226 sub get_sample_data {
227 ## Retrieve Values
228 my $self = shift ;
229 my ( $ms_file ) = @_ ;
230
231 my $control = 0;
232 my %features = (
233 'sample_type' => undef,
234 ) ;
235 if ( ( defined $ms_file ) and ( -e $ms_file )) {
236 open(MS, "<$ms_file") or die "Cant' read the file $ms_file\n" ;
237 while ( my $field = <MS> ){
238 chomp $field ;
239 if ($field =~/SP\$SAMPLE(.*)/) { $features{'sample_type'} = $1 ; $control++ ; }
240 else { next ; }
241 }
242 close(MS) ;
243 }
244 else {
245 croak "Can't work with a undef / none existing massbank file\n" ;
246 }
247 if ($control == 0) { %features = () ; }
248 return(\%features) ;
249 }
250 ## END of SUB
251
252 =head2 METHOD get_chromato_methods_data
253
254 ## Description : permet de recuperer tous les champs d'un object massbank
255 ## Input : $ms_file
256 ## Output : $features
257 ## Usage : my ( $features ) = get_chromato_methods_data( $ms_file ) ;
258
259 =cut
260 ## START of SUB
261 sub get_chromato_methods_data {
262 ## Retrieve Values
263 my $self = shift ;
264 my ( $ms_file ) = @_ ;
265
266 my $control = 0 ;
267 my %features = (
268 'capillary_voltage' => undef,
269 'column_name' => undef,
270 'column_temperature' => undef,
271 'flow_gradient' => undef,
272 'flow_rate' => undef,
273 'retention_time' => undef,
274 ) ;
275 if ( ( defined $ms_file ) and ( -e $ms_file )) {
276 open(MS, "<$ms_file") or die "Cant' read the file $ms_file\n" ;
277 while ( my $field = <MS> ){
278 chomp $field ;
279 if ($field =~/AC\$CHROMATOGRAPHY: CAPILLARY_VOLTAGE (.*)/) { $features{'capillary_voltage'} = $1 ; $control++ ; }
280 elsif ($field =~/AC\$CHROMATOGRAPHY: COLUMN_NAME (.*)/) { $features{'column_name'} = $1 ; $control++ ; }
281 elsif ($field =~/AC\$CHROMATOGRAPHY: COLUMN_TEMPERATURE (.*)/) { $features{'column_temperature'} = $1 ; $control++ ; }
282 elsif ($field =~/AC\$CHROMATOGRAPHY: FLOW_GRADIENT (.*)/) { $features{'flow_gradient'} = $1 ; $control++ ; }
283 elsif ($field =~/AC\$CHROMATOGRAPHY: FLOW_RATE (.*)/) { $features{'flow_rate'} = $1 ; $control++ ; }
284 elsif ($field =~/AC\$CHROMATOGRAPHY: RETENTION_TIME (.*)/) { $features{'retention_time'} = $1 ; $control++ ; }
285 else { next ; }
286 }
287 close(MS) ;
288 # for db field
289 }
290 else {
291 croak "Can't work with a undef / none existing massbank file\n" ;
292 }
293 if ($control == 0) { %features = () ; }
294 return(\%features) ;
295 }
296 ## END of SUB
297
298 =head2 METHOD get_analytical_conditions_data
299
300 ## Description : permet de recuperer tous les champs d'un object massbank .. for massbank version < 2.0
301 ## Input : $ms_file
302 ## Output : $features
303 ## Usage : my ( $features ) = get_analytical_conditions_data( $ms_file ) ;
304
305 =cut
306 ## START of SUB
307 sub get_analytical_conditions_data {
308 ## Retrieve Values
309 my $self = shift ;
310 my ( $ms_file ) = @_ ;
311 my $control_ms = 0 ;
312 my %features_ms = (
313 'ion_mode' => undef,
314 'ms_type' => undef,
315 'collision_energy' => undef,
316 'collision_gas' => undef,
317 'desolvation_gas_flow' => undef,
318 'desolvation_temperature' => undef,
319 'ionization_energy' => undef,
320 'laser' => undef,
321 'matrix' => undef,
322 'mass_accuracy' => undef,
323 'reagent_gas' => undef,
324 'scanning' => undef
325 ) ;
326 my $control_chrom = 0 ;
327 my %features_chrom = (
328 'capillary_voltage' => undef,
329 'column_name' => undef,
330 'column_temperature' => undef,
331 'flow_gradient' => undef,
332 'flow_rate' => undef,
333 'retention_time' => undef
334 ) ;
335 if ( ( defined $ms_file ) and ( -e $ms_file )) {
336 open(MS, "<$ms_file") or die "Cant' read the file $ms_file\n" ;
337 while ( my $field = <MS> ){
338 chomp $field ;
339 ## new = chromato_method
340 if ($field =~/AC\$ANALYTICAL_CONDITION: CAPILLARY_VOLTAGE (.*)/) { $features_chrom{'capillary_voltage'} = $1 ; $control_chrom++ ; }
341 elsif ($field =~/AC\$ANALYTICAL_CONDITION: COLUMN_NAME (.*)/) { $features_chrom{'column_name'} = $1 ; $control_chrom++ ; }
342 elsif ($field =~/AC\$ANALYTICAL_CONDITION: COLUMN_TEMPERATURE( .*)/) { $features_chrom{'column_temperature'} = $1 ; $control_chrom++ ; }
343 elsif ($field =~/AC\$ANALYTICAL_CONDITION: FLOW_GRADIENT (.*)/) { $features_chrom{'flow_gradient'} = $1 ; $control_chrom++ ; }
344 elsif ($field =~/AC\$ANALYTICAL_CONDITION: FLOW_RATE (.*)/) { $features_chrom{'flow_rate'} = $1 ; $control_chrom++ ; }
345 elsif ($field =~/AC\$ANALYTICAL_CONDITION: RETENTION_TIME (.*)/) { $features_chrom{'retention_time'} = $1 ; $control_chrom++ ; }
346 ## new = ms_method
347 elsif ($field =~/AC\$ANALYTICAL_CONDITION: ION_MODE (.*)/) { $features_ms{'ion_mode'} = $1 ; $control_ms++ ; } # mandatory
348 elsif ($field =~/AC\$ANALYTICAL_CONDITION: MS_TYPE (.*)/) { $features_ms{'ms_type'} = $1 ; $control_ms++ ; } # mandatory
349 elsif ($field =~/AC\$ANALYTICAL_CONDITION: COLLISION_ENERGY (.*)/) { $features_ms{'collision_energy'} = $1 ; $control_ms++ ; } # optionnal
350 elsif ($field =~/AC\$ANALYTICAL_CONDITION: COLLISION_GAS (.*)/) { $features_ms{'collision_gas'} = $1 ; $control_ms++ ; } # optionnal
351 elsif ($field =~/AC\$ANALYTICAL_CONDITION: DESOLVATION_GAS_FLOW (.*)/) { $features_ms{'desolvation_gas_flow'} = $1 ; $control_ms++ ; } # optionnal
352 elsif ($field =~/AC\$ANALYTICAL_CONDITION: DESOLVATION_TEMPERATURE (.*)/) { $features_ms{'desolvation_temperature'} = $1 ; $control_ms++ ; } # optionnal
353 elsif ($field =~/AC\$ANALYTICAL_CONDITION: IONIZATION_ENERGY (.*)/) { $features_ms{'ionization_energy'} = $1 ; $control_ms++ ; } # optionnal
354 elsif ($field =~/AC\$ANALYTICAL_CONDITION: LASER (.*)/) { $features_ms{'laser'} = $1 ; $control_ms++ ; } # optionnal
355 elsif ($field =~/AC\$ANALYTICAL_CONDITION: MATRIX (.*)/) { $features_ms{'matrix'} = $1 ; $control_ms++ ; } # optionnal
356 elsif ($field =~/AC\$ANALYTICAL_CONDITION: MASS_ACCURACY (.*)/) { $features_ms{'mass_accuracy'} = $1 ; $control_ms++ ; } # optionnal
357 elsif ($field =~/AC\$ANALYTICAL_CONDITION: REAGENT_GAS (.*)/) { $features_ms{'reagent_gas'} = $1 ; $control_ms++ ; } # optionnal
358 elsif ($field =~/AC\$ANALYTICAL_CONDITION: SCANNING (.*)/) { $features_ms{'scanning'} = $1 ; $control_ms++ ; } # optionnal
359 else { next ; }
360 }
361 close(MS) ;
362 # for db field
363 }
364 else {
365 croak "Can't work with a undef / none existing massbank file\n" ;
366 }
367 if ($control_ms == 0) { %features_ms = () ; }
368 if ($control_chrom == 0) { %features_chrom = () ; }
369 return(\%features_chrom, \%features_ms) ;
370 }
371 ## END of SUB
372
373 =head2 METHOD get_spectrums_data
374
375 ## Description : permet de recuperer tous les champs d'un object massbank
376 ## Input : $ms_file
377 ## Output : $features
378 ## Usage : my ( $features ) = get_spectrums_data( $ms_file ) ;
379
380 =cut
381 ## START of SUB
382 sub get_spectrums_data {
383 ## Retrieve Values
384 my $self = shift ;
385 my ( $ms_file ) = @_ ;
386 my $control = 0 ;
387 my %features = (
388 'ion_type' => undef,
389 'precursor_mz' => undef,
390 'precursor_type' => undef,
391 'num_peaks' => undef,
392 ) ;
393 if ( ( defined $ms_file ) and ( -e $ms_file )) {
394 open(MS, "<$ms_file") or die "Cant' read the file $ms_file\n" ;
395 while ( my $field = <MS> ){
396 chomp $field ;
397 if ($field =~/MS\$FOCUSED_ION: ION_TYPE(.*)/) { $features{'ion_type'} = $1 ; $control++ ; }
398 elsif ($field =~/MS\$FOCUSED_ION: PRECURSOR_M\/Z(.*)/) { $features{'precursor_mz'} = $1 ; $control++ ; }
399 elsif ($field =~/MS\$FOCUSED_ION: PRECURSOR_TYPE(.*)/) { $features{'precursor_type'} = $1 ; $control++ ; }
400 elsif ($field =~/PK\$NUM_PEAK: (.*)/) { $features{'num_peaks'} = $1 ; $control++ ; }
401 else { next ; }
402 }
403 close(MS) ;
404 # for db field
405 }
406 else {
407 croak "Can't work with a undef / none existing massbank file\n" ;
408 }
409 if ($control == 0) { %features = () ; }
410 return(\%features) ;
411 }
412 ## END of SUB
413
414 =head2 METHOD get_peaks_data
415
416 ## Description : permet de recuperer tous les champs d'un object massbank
417 ## Input : $ms_file
418 ## Output : $features
419 ## Usage : my ( $features ) = get_peaks_data( $ms_file ) ;
420
421 =cut
422 ## START of SUB
423 sub get_peaks_data {
424 ## Retrieve Values
425 my $self = shift ;
426 my ( $ms_file ) = @_ ;
427
428 my @features = () ;
429 my $peaks = 0 ;
430 if ( ( defined $ms_file ) and ( -e $ms_file )) {
431 open(MS, "<$ms_file") or die "Cant' read the file $ms_file\n" ;
432 while ( my $field = <MS> ){
433 chomp $field ;
434 if ($field =~/PK\$PEAK: m\/z int\. rel\.int\./) { $peaks = 1 ; }
435 elsif ( $peaks == 1 ) { ## detected peak area
436 if ($field =~/\s+(\d+)\s+(\d+)\s+(\d+)/) {
437 my %tmp = ( 'mz' => $1, 'intensity' => $2, 'relative_intensity' => $3 ) ;
438 push (@features, \%tmp) ;
439 }
440 ## for int = xx.xxx and mz = xxx.xxx
441 elsif ($field =~/\s+(\d+\.\d+)\s+(\d+\.\d+)\s+(\d+)/) {
442 my %tmp = ( 'mz' => $1, 'intensity' => $2, 'relative_intensity' => $3 ) ;
443 push (@features, \%tmp) ;
444 }
445 ## for int = xx and mz = xxx.xxx
446 elsif ($field =~/\s+(\d+\.\d+)\s+(\d+)\s+(\d+)/) {
447 my %tmp = ( 'mz' => $1, 'intensity' => $2, 'relative_intensity' => $3 ) ;
448 push (@features, \%tmp) ;
449 }
450 ## for int = xxxxx.xxx and mz = xxx
451 elsif ($field =~/\s+(\d+)\s+(\d+\.\d+)\s+(\d+)/) {
452 my %tmp = ( 'mz' => $1, 'intensity' => $2, 'relative_intensity' => $3 ) ;
453 push (@features, \%tmp) ;
454 }
455 }
456 else { next ; }
457 }
458 close(MS) ;
459 # for db field
460 }
461 else {
462 croak "Can't work with a undef / none existing massbank file\n" ;
463 }
464 return(\@features) ;
465 }
466 ## END of SUB
467
468 =head2 METHOD getPeaksFromString
469
470 ## Description : permet de recuperer la data peaks d'un record handler massbank
471 ## Input : $record
472 ## Output : $features
473 ## Usage : my ( $features ) = getPeaksFromString( $record ) ;
474
475 =cut
476 ## START of SUB
477 sub getPeaksFromString {
478 ## Retrieve Values
479 my $self = shift ;
480 my ( $record ) = @_ ;
481
482 my @features = () ;
483 my $peaks = 0 ;
484 if ( defined $record ) {
485 my @tmp = split(/\n/, $record) ;
486 foreach my $field (@tmp) {
487 if ($field =~/PK\$PEAK: m\/z int\. rel\.int\./) { $peaks = 1 ; }
488 elsif ( $peaks == 1 ) { ## detected peak area
489 if ($field =~/\s+(\d+)\s+(\d+)\s+(\d+)/) {
490 my %tmp = ( 'mz' => $1, 'intensity' => $2, 'relative_intensity' => $3 ) ;
491 push (@features, \%tmp) ;
492 }
493 ## for int = xx.xxx and mz = xxx.xxx
494 elsif ($field =~/\s+(\d+\.\d+)\s+(\d+\.\d+)\s+(\d+)/) {
495 my %tmp = ( 'mz' => $1, 'intensity' => $2, 'relative_intensity' => $3 ) ;
496 push (@features, \%tmp) ;
497 }
498 ## for int = xx and mz = xxx.xxx
499 elsif ($field =~/\s+(\d+\.\d+)\s+(\d+)\s+(\d+)/) {
500 my %tmp = ( 'mz' => $1, 'intensity' => $2, 'relative_intensity' => $3 ) ;
501 push (@features, \%tmp) ;
502 }
503 ## for int = xxxxx.xxx and mz = xxx
504 elsif ($field =~/\s+(\d+)\s+(\d+\.\d+)\s+(\d+)/) {
505 my %tmp = ( 'mz' => $1, 'intensity' => $2, 'relative_intensity' => $3 ) ;
506 push (@features, \%tmp) ;
507 }
508 ## for int = x.xxxex and m/z = xxx.xxx (int with exposant)
509 elsif ($field =~/\s+(\d+\.\d+)\s+(\d+\.\d+)e(\d)\s+(\d+)/) {
510 my %tmp = ( 'mz' => $1, 'intensity' => ($2*(10*$3)), 'relative_intensity' => $4 ) ;
511 push (@features, \%tmp) ;
512 }
513 }
514 else { next ; }
515 }
516 # for db field
517 }
518 else {
519 croak "Can't work with a undef / none existing massbank handler\n" ;
520 }
521 return(\@features) ;
522 }
523 ## END of SUB
524
525 =head2 METHOD getIdFromString
526
527 ## Description : get the accesion id of massbank record
528 ## Input : $record
529 ## Output : $id
530 ## Usage : my ( $id ) = getIdFromString ( $record ) ;
531
532 =cut
533 ## START of SUB
534 sub getIdFromString {
535 ## Retrieve Values
536 my $self = shift ;
537 my ( $record ) = @_;
538 my ( $id ) = ( undef ) ;
539
540 if ( defined $record ) {
541 my @tmp = split(/\n/, $record) ;
542 foreach my $field (@tmp) {
543 if ($field =~/ACCESSION:\s+(.+)/) {
544 $id = $1;
545 }
546 }
547 # for db field
548 }
549 else {
550 croak "Can't work with a undef / none existing massbank handler\n" ;
551 }
552
553 return ($id) ;
554 }
555 ### END of SUB
556
557
558
559 =head2 METHOD getInstrumentTypeFromString
560
561 ## Description : get the instrument type of massbank record
562 ## Input : $record
563 ## Output : $instrumentType
564 ## Usage : my ( $instrumentType ) = getInstrumentTypeFromString ( $record ) ;
565
566 =cut
567 ## START of SUB
568 sub getInstrumentTypeFromString {
569 ## Retrieve Values
570 my $self = shift ;
571 my ( $record ) = @_;
572 my ( $instrumentType ) = ( undef ) ;
573
574 if ( defined $record ) {
575 my @tmp = split(/\n/, $record) ;
576 foreach my $field (@tmp) {
577 if ($field =~/INSTRUMENT_TYPE:\s+(.+)/) {
578 $instrumentType = $1;
579 }
580 }
581 # for db field
582 }
583 else {
584 croak "Can't work with a undef / none existing massbank handler\n" ;
585 }
586
587 return ($instrumentType) ;
588 }
589 ### END of SUB
590
591 =head2 METHOD getFormulaFromString
592
593 ## Description : get the elementar formula of massbank record
594 ## Input : $record
595 ## Output : $formula
596 ## Usage : my ( $formula ) = getFormulaFromString ( $record ) ;
597
598 =cut
599 ## START of SUB
600 sub getFormulaFromString {
601 ## Retrieve Values
602 my $self = shift ;
603 my ( $record ) = @_;
604 my ( $formula ) = ( undef ) ;
605
606 if ( defined $record ) {
607 my @tmp = split(/\n/, $record) ;
608 foreach my $field (@tmp) {
609 if ($field =~/CH\$FORMULA:\s+(.+)/) {
610 $formula = $1;
611 }
612 }
613 # for db field
614 }
615 else {
616 croak "Can't work with a undef / none existing massbank handler\n" ;
617 }
618
619 return ($formula) ;
620 }
621 ### END of SUB
622
623 =head2 METHOD getInchiFromString
624
625 ## Description : get the IUPAC InCHi of massbank record
626 ## Input : $record
627 ## Output : $inchi
628 ## Usage : my ( $inchi ) = getInchiFromString ( $record ) ;
629
630 =cut
631 ## START of SUB
632 sub getInchiFromString {
633 ## Retrieve Values
634 my $self = shift ;
635 my ( $record ) = @_;
636 my ( $inchi ) = ( undef ) ;
637
638 if ( defined $record ) {
639 my @tmp = split(/\n/, $record) ;
640 foreach my $field (@tmp) {
641 if ($field =~/CH\$IUPAC:\s+(.+)/) {
642 $inchi = $1;
643 }
644 }
645 # for db field
646 }
647 else {
648 croak "Can't work with a undef / none existing massbank handler\n" ;
649 }
650
651 return ($inchi) ;
652 }
653 ### END of SUB
654
655 =head2 METHOD getExactMzFromString
656
657 ## Description : get the exact mass of massbank record
658 ## Input : $record
659 ## Output : $exactMass
660 ## Usage : my ( $exactMass ) = getExactMzFromString ( $record ) ;
661
662 =cut
663 ## START of SUB
664 sub getExactMzFromString {
665 ## Retrieve Values
666 my $self = shift ;
667 my ( $record ) = @_;
668 my ( $exactMass ) = ( undef ) ;
669
670 if ( defined $record ) {
671 my @tmp = split(/\n/, $record) ;
672 foreach my $field (@tmp) {
673 if ($field =~/CH\$EXACT_MASS:\s+(.+)/) {
674 $exactMass = $1;
675 }
676 }
677 # for db field
678 }
679 else {
680 croak "Can't work with a undef / none existing massbank handler\n" ;
681 }
682
683 return ($exactMass) ;
684 }
685 ### END of SUB
686
687
688 =head2 METHOD getPrecursorTypeFromString
689
690 ## Description : get the precursor type of massbank record
691 ## Input : $record
692 ## Output : $precursorType
693 ## Usage : my ( $precursorType ) = getPrecursorTypeFromString ( $record ) ;
694
695 =cut
696 ## START of SUB
697 sub getPrecursorTypeFromString {
698 ## Retrieve Values
699 my $self = shift ;
700 my ( $record ) = @_;
701 my $id = undef ;
702 my $precursorType = undef ;
703 my $precursorType_first = undef ;
704 my $ionType_first = undef ;
705 my $precursorType_optionnal = undef ;
706
707 if ( defined $record ) {
708 my @tmp = split(/\n/, $record) ;
709 foreach my $field (@tmp) {
710 if ($field =~/ACCESSION:\s+(.+)/) {
711 $id = $1;
712 }
713 if ($field =~/RECORD_TITLE:\s+(.+)/) {
714 my @title = split(/;/, $1) ;
715 $precursorType_optionnal = $title[-1] ;
716 $precursorType_optionnal =~ s/\s//g ;
717 }
718 if ($field =~/PRECURSOR_TYPE(.+)/) {
719 $precursorType_first = $1;
720 last;
721 }
722 if ($field =~/ION_TYPE(.+)/) {
723 $ionType_first = $1;
724 last;
725 }
726 }
727 # for db field
728 }
729 else {
730 croak "Can't work with a undef / none existing massbank handler\n" ;
731 }
732
733 ## manage undef precursor/ion type field
734 # print "ID:$id-//-$precursorType_first-//-$ionType_first-//-$precursorType_optionnal\n" ;
735 if (defined $precursorType_first) {
736 $precursorType = $precursorType_first ;
737 }
738 elsif ( (!defined $precursorType_first) and (defined $ionType_first) ) {
739 $precursorType = $ionType_first ;
740 }
741 elsif ( (!defined $precursorType_first) and (!defined $ionType_first) and (defined $precursorType_optionnal) ) {
742 $precursorType = $precursorType_optionnal ;
743 }
744 else {
745 $precursorType = 'NA' ;
746 }
747
748 return ($precursorType) ;
749 }
750 ### END of SUB
751
752 =head2 METHOD getMsTypeFromString
753
754 ## Description : get the MS type of massbank record
755 ## Input : $record
756 ## Output : $msType
757 ## Usage : my ( $msType ) = getMsTypeFromString ( $record ) ;
758
759 =cut
760 ## START of SUB
761 sub getMsTypeFromString {
762 ## Retrieve Values
763 my $self = shift ;
764 my ( $record ) = @_;
765 my ( $msType ) = ( undef ) ;
766
767 if ( defined $record ) {
768 my @tmp = split(/\n/, $record) ;
769 foreach my $field (@tmp) {
770 if ($field =~/AC\$MASS_SPECTROMETRY:\s+MS_TYPE\s+(.+)/) {
771 $msType = $1;
772 }
773 }
774 # for db field
775 }
776 else {
777 croak "Can't work with a undef / none existing massbank handler\n" ;
778 }
779
780 return ($msType) ;
781 }
782 ### END of SUB
783
784 =head2 METHOD getChemNamesFromString
785
786 ## Description : get lits of names of a massbank record
787 ## Input : $record
788 ## Output : $names
789 ## Usage : my ( $names ) = getChemNamesFromString( $record ) ;
790
791 =cut
792 ## START of SUB
793 sub getChemNamesFromString {
794 ## Retrieve Values
795 my $self = shift ;
796 my ( $record ) = @_ ;
797
798 my @names = () ;
799 if ( defined $record ) {
800 my @tmp = split(/\n/, $record) ;
801 foreach my $field (@tmp) {
802 if ($field =~/CH\$NAME: (.*)/) {
803 push(@names, $1 ) ; }
804 else { next ; }
805 }
806 }
807 else {
808 croak "Can't work with a undef / none existing massbank record (string)\n" ;
809 }
810 return(\@names) ;
811 }
812 ## END of SUB
813
814
815
816
817
818 =head2 METHOD getMassBankHandler
819
820 ## Description : get a massbank handler from a file
821 ## Input : $record
822 ## Output : $massbankHandler
823 ## Usage : my ( $massbankHandler ) = getMassBankHandler ( $record ) ;
824
825 =cut
826 ## START of SUB
827 sub getMassBankHandler {
828 ## Retrieve Values
829 my $self = shift ;
830 my ( $record ) = @_;
831 my ( $massbankHandler ) = ( undef ) ;
832
833 ## TODO...
834
835 return ($massbankHandler) ;
836 }
837 ### END of SUB
838
839 =head2 METHOD get_annotations_data
840
841 ## Description : permet de recuperer tous les champs d'un object massbank
842 ## Input : $ms_file
843 ## Output : $features
844 ## Usage : my ( $features ) = get_annotations_data( $ms_file ) ;
845
846 =cut
847 ## START of SUB
848 sub get_annotations_data {
849 ## Retrieve Values
850 my $self = shift ;
851 my ( $ms_file ) = @_ ;
852
853 my @features = () ;
854 if ( ( defined $ms_file ) and ( -e $ms_file )) {
855 open(MS, "<$ms_file") or die "Cant' read the file $ms_file\n" ;
856 while ( my $field = <MS> ){
857 chomp $field ;
858 if ($field =~/PK\$ANNOTATION:(.*)/) { push( @features, $1) ; }
859 else { next ; }
860 }
861 close(MS) ;
862 # for db field
863 }
864 else {
865 croak "Can't work with a undef / none existing massbank file\n" ;
866 }
867 return(\@features) ;
868 }
869 ## END of SUB
870
871 =head2 METHOD get_links_data
872
873 ## Description : permet de recuperer tous les champs d'un object massbank
874 ## Input : $ms_file
875 ## Output : $features
876 ## Usage : my ( $features ) = get_annotations_data( $ms_file ) ;
877
878 =cut
879 ## START of SUB
880 sub get_links_data {
881 ## Retrieve Values
882 my $self = shift ;
883 my ( $ms_file ) = @_ ;
884
885 my %features = () ;
886 my $control = 0 ;
887
888 my ( @CAS, @KEGG, @PUBCHEM ) = ((), (), ()) ;
889
890 if ( ( defined $ms_file ) and ( -e $ms_file )) {
891 open(MS, "<$ms_file") or die "Cant' read the file $ms_file\n" ;
892 while ( my $field = <MS> ){
893 chomp $field ;
894 if ($field =~/CH\$LINK: CAS (.*)/) { push (@CAS, $1) ; $control++; }
895 elsif ($field =~/CH\$LINK: KEGG (.*)/) { push (@KEGG, $1) ; $control++; }
896 elsif ($field =~/CH\$LINK: PUBCHEM CID (.*)/) { push (@PUBCHEM, $1) ; $control++; }
897 ## others !!?
898
899 else { next ; }
900 }
901 close(MS) ;
902 # for db field
903 }
904 else {
905 croak "Can't work with a undef / none existing massbank file\n" ;
906 }
907
908 $features{'CAS'} = \@CAS ;
909 $features{'KEGG'} = \@KEGG ;
910 $features{'PUBCHEM'} = \@PUBCHEM ;
911
912 return(\%features) ;
913 }
914 ## END of SUB
915
916 =head2 METHOD get_ms_record_links_data
917
918 ## Description : permet de recuperer tous les champs d'un object massbank
919 ## Input : $ms_file
920 ## Output : $features
921 ## Usage : my ( $features ) = get_ms_record_links_data( $ms_file ) ;
922
923 =cut
924 ## START of SUB
925 sub get_ms_record_links_data {
926 ## Retrieve Values
927 my $self = shift ;
928 my ( $ms_file ) = @_ ;
929
930 ## Internal reference for MASSBANK and RESPECT
931
932 my @massbank_id = ( 'TUE', 'GLS', 'AU', 'MSJ', 'ML','FIO', 'UF', 'CO', 'UO', 'TT', 'OUF', 'MCH', 'NU', 'KNA', 'MT', 'CE', 'KO', 'KZ', 'JEL', 'JP', 'PR', 'BML', 'CA', 'TY', 'PB', 'FU', 'EA', 'UT', 'BSU', 'WA' ) ;
933 my @respect_id = ( 'PS', 'PT', 'PM' ) ;
934
935 my $dabase_used = undef ;
936 my %db = ( 'accession' => undef, 'name' => undef ) ;
937 my $control = 0 ;
938
939 if ( $ms_file ) {
940 my $filename = basename("$ms_file", ".txt");
941
942 if ( $filename =~ /(\w+)$/ ) { # keep only record id (0001-PS0002 => PS0002 or BJ0045 => BJ0045)
943 $db{'accession'} = $1 ;
944 $control++ ;
945 if ( ( defined $db{'accession'} ) and ( $db{'accession'} =~ /(\D+)(\d+)/) ) {
946 my ($key, $eval) = ($1, 0) ;
947 foreach (@respect_id) { if ($_ eq $key) { $db{'name'} = 'RESPECT' ; $eval = 1 ; last ; } }
948 foreach (@massbank_id) { if ($_ eq $key) { $db{'name'} = 'MASSBANK' ; $eval = 1 ; last ; } }
949 if ( $eval == 0 ){ carp "The following key ($key) for $db{'accession'} has an unknown reference (not a Massbank or ReSpect source)\n" ; }
950 }
951 }
952 }
953 if ($control == 0) { %db = () ; }
954 return(\%db) ;
955 }
956 ## END of SUB
957
958
959 1 ;
960
961
962 __END__
963
964 =head1 SUPPORT
965
966 You can find documentation for this module with the perldoc command.
967
968 perldoc parser::chem::massbank.pm
969
970 =head1 Exports
971
972 =over 4
973
974 =item :ALL is ...
975
976 =back
977
978 =head1 AUTHOR
979
980 Franck Giacomoni E<lt>franck.giacomoni@clermont.inra.frE<gt>
981
982 =head1 LICENSE
983
984 This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself.
985
986 =head1 VERSION
987
988 version 1 : 25 / 06 / 2013
989
990 version 2 : ??
991
992 =cut