Mercurial > repos > fgiacomoni > massbank_ws_searchspectrum
comparison lib/massbank_parser.pm @ 0:023c380900ef draft default tip
Init repository with last massbank_ws_searchspectrum master version
author | fgiacomoni |
---|---|
date | Wed, 19 Apr 2017 11:31:58 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:023c380900ef |
---|---|
1 package lib::massbank_parser ; | |
2 | |
3 use strict; | |
4 use warnings ; | |
5 use Exporter ; | |
6 use Carp ; | |
7 | |
8 use File::Basename; | |
9 | |
10 use Data::Dumper ; | |
11 | |
12 use vars qw($VERSION @ISA @EXPORT %EXPORT_TAGS); | |
13 | |
14 our $VERSION = "1.0" ; | |
15 our @ISA = qw(Exporter) ; | |
16 our @EXPORT = qw( getChemNamesFromString getPeaksFromString ) ; | |
17 our %EXPORT_TAGS = ( ALL => [qw( getChemNamesFromString getPeaksFromString )] ) ; | |
18 | |
19 =head1 NAME | |
20 | |
21 parser::chem::massbank - An example module | |
22 | |
23 =head1 SYNOPSIS | |
24 | |
25 use parser::chem::massbank ; | |
26 my $object = parser::chem::massbank->new(); | |
27 print $object->as_string; | |
28 | |
29 =head1 DESCRIPTION | |
30 | |
31 This module does not really exist, it | |
32 was made for the sole purpose of | |
33 demonstrating how POD works. | |
34 | |
35 =head1 METHODS | |
36 | |
37 Methods are : | |
38 | |
39 =head2 METHOD new | |
40 | |
41 ## Description : new | |
42 ## Input : $self | |
43 ## Ouput : bless $self ; | |
44 ## Usage : new() ; | |
45 | |
46 =cut | |
47 | |
48 sub new { | |
49 ## Variables | |
50 my $self={}; | |
51 bless($self) ; | |
52 return $self ; | |
53 } | |
54 ### END of SUB | |
55 | |
56 =head2 METHOD get_list_of_analysis_intrument_names | |
57 | |
58 ## Description : permt de retourner la liste des nom uniques des instruments utilises | |
59 ## Input : $dir, $ms_files (a list of files) | |
60 ## Output : $names | |
61 ## Usage : my ( $names ) = get_list_of_analysis_intrument_names( $ms_files ) ; | |
62 | |
63 =cut | |
64 ## START of SUB | |
65 sub get_list_of_analysis_intrument_names { | |
66 ## Retrieve Values | |
67 my $self = shift ; | |
68 my ( $dir, $ms_files ) = @_ ; | |
69 my (%tmp_names, @names) = ( (), () ) ; | |
70 foreach my $ms_file (@{$ms_files}) { | |
71 my $file = $dir.'\\'.$ms_file ; | |
72 if ( ( defined $file ) and ( -e $file )) { | |
73 open(MS, "<$file") or die "Cant' read the file $file\n" ; | |
74 while ( my $field = <MS> ){ | |
75 chomp $field ; | |
76 if ($field =~/AC\$INSTRUMENT:(.*)/) { | |
77 if ( $tmp_names{$1} ) { last ; } | |
78 else { $tmp_names{$1} = 1 ; push (@names, $1) ; } | |
79 } | |
80 } | |
81 close(MS) ; | |
82 } | |
83 else { | |
84 croak "Can't work with a undef / none existing massbank file\n" ; | |
85 } | |
86 } | |
87 return(\@names) ; | |
88 } | |
89 ## END of SUB | |
90 | |
91 =head2 METHOD get_analysis_instruments_data | |
92 | |
93 ## Description : permet de recuperer tous les champs d'un object massbank | |
94 ## Input : $ms_file | |
95 ## Output : $features | |
96 ## Usage : my ( $features ) = get_analysis_instruments_data( $ms_file ) ; | |
97 | |
98 =cut | |
99 ## START of SUB | |
100 sub get_analysis_instruments_data { | |
101 ## Retrieve Values | |
102 my $self = shift ; | |
103 my ( $ms_file ) = @_ ; | |
104 | |
105 my $control = 0 ; | |
106 my %features = ( | |
107 'name' => undef, | |
108 'type' => undef, | |
109 ) ; | |
110 if ( ( defined $ms_file ) and ( -e $ms_file )) { | |
111 open(MS, "<$ms_file") or die "Cant' read the file $ms_file\n" ; | |
112 while ( my $field = <MS> ){ | |
113 chomp $field ; | |
114 if ($field =~/AC\$INSTRUMENT: (.*)/) { $features{'name'} = $1 ; $control++; } | |
115 elsif ($field =~/AC\$INSTRUMENT_TYPE: (.*)/) { $features{'type'} = $1 ; $control++; } | |
116 else { next ; } | |
117 } | |
118 close(MS) ; | |
119 } | |
120 else { | |
121 croak "Can't work with a undef / none existing massbank file\n" ; | |
122 } | |
123 if ($control == 0) { %features = () ; } | |
124 return(\%features) ; | |
125 } | |
126 ## END of SUB | |
127 | |
128 =head2 METHOD get_ms_methods_data | |
129 | |
130 ## Description : permet de recuperer tous les champs d'un object massbank | |
131 ## Input : $ms_file | |
132 ## Output : $features | |
133 ## Usage : my ( $features ) = get_ms_methods_data( $ms_file ) ; | |
134 | |
135 =cut | |
136 ## START of SUB | |
137 sub get_ms_methods_data { | |
138 ## Retrieve Values | |
139 my $self = shift ; | |
140 my ( $ms_file ) = @_ ; | |
141 | |
142 my $control = 0 ; | |
143 my %features = ( | |
144 'ion_mode' => undef, | |
145 'ms_type' => undef, | |
146 'collision_energy' => undef, | |
147 'collision_gas' => undef, | |
148 'desolvation_gas_flow' => undef, | |
149 'desolvation_temperature' => undef, | |
150 'ionization_energy' => undef, | |
151 'laser' => undef, | |
152 'matrix' => undef, | |
153 'mass_accuracy' => undef, | |
154 'reagent_gas' => undef, | |
155 'scanning' => undef | |
156 ) ; | |
157 if ( ( defined $ms_file ) and ( -e $ms_file )) { | |
158 open(MS, "<$ms_file") or die "Cant' read the file $ms_file\n" ; | |
159 while ( my $field = <MS> ){ | |
160 chomp $field ; | |
161 if ($field =~/AC\$MASS_SPECTROMETRY: ION_MODE:(.*)/) { $features{'ion_mode'} = $1 ; $control++; } # mandatory | |
162 elsif ($field =~/AC\$MASS_SPECTROMETRY: MS_TYPE:(.*)/) { $features{'ms_type'} = $1 ; $control++; } # mandatory | |
163 elsif ($field =~/AC\$MASS_SPECTROMETRY: COLLISION_ENERGY(.*)/) { $features{'collision_energy'} = $1 ; $control++; } # optionnal | |
164 elsif ($field =~/AC\$MASS_SPECTROMETRY: COLLISION_GAS(.*)/) { $features{'collision_gas'} = $1 ; $control++; } # optionnal | |
165 elsif ($field =~/AC\$MASS_SPECTROMETRY: DESOLVATION_GAS_FLOW(.*)/) { $features{'desolvation_gas_flow'} = $1 ; $control++; } # optionnal | |
166 elsif ($field =~/AC\$MASS_SPECTROMETRY: DESOLVATION_TEMPERATURE(.*)/) { $features{'desolvation_temperature'} = $1 ; $control++; } # optionnal | |
167 elsif ($field =~/AC\$MASS_SPECTROMETRY: IONIZATION_ENERGY(.*)/) { $features{'ionization_energy'} = $1 ; $control++; } # optionnal | |
168 elsif ($field =~/AC\$MASS_SPECTROMETRY: LASER(.*)/) { $features{'laser'} = $1 ; $control++; } # optionnal | |
169 elsif ($field =~/AC\$MASS_SPECTROMETRY: MATRIX(.*)/) { $features{'matrix'} = $1 ; $control++; } # optionnal | |
170 elsif ($field =~/AC\$MASS_SPECTROMETRY: MASS_ACCURACY(.*)/) { $features{'mass_accuracy'} = $1 ; $control++; } # optionnal | |
171 elsif ($field =~/AC\$MASS_SPECTROMETRY: REAGENT_GAS(.*)/) { $features{'reagent_gas'} = $1 ; $control++; } # optionnal | |
172 elsif ($field =~/AC\$MASS_SPECTROMETRY: SCANNING(.*)/) { $features{'scanning'} = $1 ; $control++; } # optionnal | |
173 else { next ; } | |
174 } | |
175 close(MS) ; | |
176 } | |
177 else { | |
178 croak "Can't work with a undef / none existing massbank file\n" ; | |
179 } | |
180 ## vide l'object si undef | |
181 if ($control == 0) { %features = () ; } | |
182 return(\%features) ; | |
183 } | |
184 ## END of SUB | |
185 | |
186 =head2 METHOD get_solvents_data | |
187 | |
188 ## Description : permet de recuperer tous les champs d'un object massbank | |
189 ## Input : $ms_file | |
190 ## Output : $features | |
191 ## Usage : my ( $features ) = get_solvents_data( $ms_file ) ; | |
192 | |
193 =cut | |
194 ## START of SUB | |
195 sub get_solvents_data { | |
196 ## Retrieve Values | |
197 my $self = shift ; | |
198 my ( $ms_file ) = @_ ; | |
199 | |
200 my @features = () ; | |
201 if ( ( defined $ms_file ) and ( -e $ms_file )) { | |
202 open(MS, "<$ms_file") or die "Cant' read the file $ms_file\n" ; | |
203 while ( my $field = <MS> ){ | |
204 chomp $field ; | |
205 if ($field =~/AC\$CHROMATOGRAPHY: SOLVENT(.*)/) { push(@features, 'Solvent '.$1 ) ; } | |
206 else { next ; } | |
207 } | |
208 close(MS) ; | |
209 } | |
210 else { | |
211 croak "Can't work with a undef / none existing massbank file\n" ; | |
212 } | |
213 return(\@features) ; | |
214 } | |
215 ## END of SUB | |
216 | |
217 =head2 METHOD get_sample_data | |
218 | |
219 ## Description : permet de recuperer tous les champs d'un object massbank | |
220 ## Input : $ms_file | |
221 ## Output : $features | |
222 ## Usage : my ( $features ) = get_sample_data( $ms_file ) ; | |
223 | |
224 =cut | |
225 ## START of SUB | |
226 sub get_sample_data { | |
227 ## Retrieve Values | |
228 my $self = shift ; | |
229 my ( $ms_file ) = @_ ; | |
230 | |
231 my $control = 0; | |
232 my %features = ( | |
233 'sample_type' => undef, | |
234 ) ; | |
235 if ( ( defined $ms_file ) and ( -e $ms_file )) { | |
236 open(MS, "<$ms_file") or die "Cant' read the file $ms_file\n" ; | |
237 while ( my $field = <MS> ){ | |
238 chomp $field ; | |
239 if ($field =~/SP\$SAMPLE(.*)/) { $features{'sample_type'} = $1 ; $control++ ; } | |
240 else { next ; } | |
241 } | |
242 close(MS) ; | |
243 } | |
244 else { | |
245 croak "Can't work with a undef / none existing massbank file\n" ; | |
246 } | |
247 if ($control == 0) { %features = () ; } | |
248 return(\%features) ; | |
249 } | |
250 ## END of SUB | |
251 | |
252 =head2 METHOD get_chromato_methods_data | |
253 | |
254 ## Description : permet de recuperer tous les champs d'un object massbank | |
255 ## Input : $ms_file | |
256 ## Output : $features | |
257 ## Usage : my ( $features ) = get_chromato_methods_data( $ms_file ) ; | |
258 | |
259 =cut | |
260 ## START of SUB | |
261 sub get_chromato_methods_data { | |
262 ## Retrieve Values | |
263 my $self = shift ; | |
264 my ( $ms_file ) = @_ ; | |
265 | |
266 my $control = 0 ; | |
267 my %features = ( | |
268 'capillary_voltage' => undef, | |
269 'column_name' => undef, | |
270 'column_temperature' => undef, | |
271 'flow_gradient' => undef, | |
272 'flow_rate' => undef, | |
273 'retention_time' => undef, | |
274 ) ; | |
275 if ( ( defined $ms_file ) and ( -e $ms_file )) { | |
276 open(MS, "<$ms_file") or die "Cant' read the file $ms_file\n" ; | |
277 while ( my $field = <MS> ){ | |
278 chomp $field ; | |
279 if ($field =~/AC\$CHROMATOGRAPHY: CAPILLARY_VOLTAGE (.*)/) { $features{'capillary_voltage'} = $1 ; $control++ ; } | |
280 elsif ($field =~/AC\$CHROMATOGRAPHY: COLUMN_NAME (.*)/) { $features{'column_name'} = $1 ; $control++ ; } | |
281 elsif ($field =~/AC\$CHROMATOGRAPHY: COLUMN_TEMPERATURE (.*)/) { $features{'column_temperature'} = $1 ; $control++ ; } | |
282 elsif ($field =~/AC\$CHROMATOGRAPHY: FLOW_GRADIENT (.*)/) { $features{'flow_gradient'} = $1 ; $control++ ; } | |
283 elsif ($field =~/AC\$CHROMATOGRAPHY: FLOW_RATE (.*)/) { $features{'flow_rate'} = $1 ; $control++ ; } | |
284 elsif ($field =~/AC\$CHROMATOGRAPHY: RETENTION_TIME (.*)/) { $features{'retention_time'} = $1 ; $control++ ; } | |
285 else { next ; } | |
286 } | |
287 close(MS) ; | |
288 # for db field | |
289 } | |
290 else { | |
291 croak "Can't work with a undef / none existing massbank file\n" ; | |
292 } | |
293 if ($control == 0) { %features = () ; } | |
294 return(\%features) ; | |
295 } | |
296 ## END of SUB | |
297 | |
298 =head2 METHOD get_analytical_conditions_data | |
299 | |
300 ## Description : permet de recuperer tous les champs d'un object massbank .. for massbank version < 2.0 | |
301 ## Input : $ms_file | |
302 ## Output : $features | |
303 ## Usage : my ( $features ) = get_analytical_conditions_data( $ms_file ) ; | |
304 | |
305 =cut | |
306 ## START of SUB | |
307 sub get_analytical_conditions_data { | |
308 ## Retrieve Values | |
309 my $self = shift ; | |
310 my ( $ms_file ) = @_ ; | |
311 my $control_ms = 0 ; | |
312 my %features_ms = ( | |
313 'ion_mode' => undef, | |
314 'ms_type' => undef, | |
315 'collision_energy' => undef, | |
316 'collision_gas' => undef, | |
317 'desolvation_gas_flow' => undef, | |
318 'desolvation_temperature' => undef, | |
319 'ionization_energy' => undef, | |
320 'laser' => undef, | |
321 'matrix' => undef, | |
322 'mass_accuracy' => undef, | |
323 'reagent_gas' => undef, | |
324 'scanning' => undef | |
325 ) ; | |
326 my $control_chrom = 0 ; | |
327 my %features_chrom = ( | |
328 'capillary_voltage' => undef, | |
329 'column_name' => undef, | |
330 'column_temperature' => undef, | |
331 'flow_gradient' => undef, | |
332 'flow_rate' => undef, | |
333 'retention_time' => undef | |
334 ) ; | |
335 if ( ( defined $ms_file ) and ( -e $ms_file )) { | |
336 open(MS, "<$ms_file") or die "Cant' read the file $ms_file\n" ; | |
337 while ( my $field = <MS> ){ | |
338 chomp $field ; | |
339 ## new = chromato_method | |
340 if ($field =~/AC\$ANALYTICAL_CONDITION: CAPILLARY_VOLTAGE (.*)/) { $features_chrom{'capillary_voltage'} = $1 ; $control_chrom++ ; } | |
341 elsif ($field =~/AC\$ANALYTICAL_CONDITION: COLUMN_NAME (.*)/) { $features_chrom{'column_name'} = $1 ; $control_chrom++ ; } | |
342 elsif ($field =~/AC\$ANALYTICAL_CONDITION: COLUMN_TEMPERATURE( .*)/) { $features_chrom{'column_temperature'} = $1 ; $control_chrom++ ; } | |
343 elsif ($field =~/AC\$ANALYTICAL_CONDITION: FLOW_GRADIENT (.*)/) { $features_chrom{'flow_gradient'} = $1 ; $control_chrom++ ; } | |
344 elsif ($field =~/AC\$ANALYTICAL_CONDITION: FLOW_RATE (.*)/) { $features_chrom{'flow_rate'} = $1 ; $control_chrom++ ; } | |
345 elsif ($field =~/AC\$ANALYTICAL_CONDITION: RETENTION_TIME (.*)/) { $features_chrom{'retention_time'} = $1 ; $control_chrom++ ; } | |
346 ## new = ms_method | |
347 elsif ($field =~/AC\$ANALYTICAL_CONDITION: ION_MODE (.*)/) { $features_ms{'ion_mode'} = $1 ; $control_ms++ ; } # mandatory | |
348 elsif ($field =~/AC\$ANALYTICAL_CONDITION: MS_TYPE (.*)/) { $features_ms{'ms_type'} = $1 ; $control_ms++ ; } # mandatory | |
349 elsif ($field =~/AC\$ANALYTICAL_CONDITION: COLLISION_ENERGY (.*)/) { $features_ms{'collision_energy'} = $1 ; $control_ms++ ; } # optionnal | |
350 elsif ($field =~/AC\$ANALYTICAL_CONDITION: COLLISION_GAS (.*)/) { $features_ms{'collision_gas'} = $1 ; $control_ms++ ; } # optionnal | |
351 elsif ($field =~/AC\$ANALYTICAL_CONDITION: DESOLVATION_GAS_FLOW (.*)/) { $features_ms{'desolvation_gas_flow'} = $1 ; $control_ms++ ; } # optionnal | |
352 elsif ($field =~/AC\$ANALYTICAL_CONDITION: DESOLVATION_TEMPERATURE (.*)/) { $features_ms{'desolvation_temperature'} = $1 ; $control_ms++ ; } # optionnal | |
353 elsif ($field =~/AC\$ANALYTICAL_CONDITION: IONIZATION_ENERGY (.*)/) { $features_ms{'ionization_energy'} = $1 ; $control_ms++ ; } # optionnal | |
354 elsif ($field =~/AC\$ANALYTICAL_CONDITION: LASER (.*)/) { $features_ms{'laser'} = $1 ; $control_ms++ ; } # optionnal | |
355 elsif ($field =~/AC\$ANALYTICAL_CONDITION: MATRIX (.*)/) { $features_ms{'matrix'} = $1 ; $control_ms++ ; } # optionnal | |
356 elsif ($field =~/AC\$ANALYTICAL_CONDITION: MASS_ACCURACY (.*)/) { $features_ms{'mass_accuracy'} = $1 ; $control_ms++ ; } # optionnal | |
357 elsif ($field =~/AC\$ANALYTICAL_CONDITION: REAGENT_GAS (.*)/) { $features_ms{'reagent_gas'} = $1 ; $control_ms++ ; } # optionnal | |
358 elsif ($field =~/AC\$ANALYTICAL_CONDITION: SCANNING (.*)/) { $features_ms{'scanning'} = $1 ; $control_ms++ ; } # optionnal | |
359 else { next ; } | |
360 } | |
361 close(MS) ; | |
362 # for db field | |
363 } | |
364 else { | |
365 croak "Can't work with a undef / none existing massbank file\n" ; | |
366 } | |
367 if ($control_ms == 0) { %features_ms = () ; } | |
368 if ($control_chrom == 0) { %features_chrom = () ; } | |
369 return(\%features_chrom, \%features_ms) ; | |
370 } | |
371 ## END of SUB | |
372 | |
373 =head2 METHOD get_spectrums_data | |
374 | |
375 ## Description : permet de recuperer tous les champs d'un object massbank | |
376 ## Input : $ms_file | |
377 ## Output : $features | |
378 ## Usage : my ( $features ) = get_spectrums_data( $ms_file ) ; | |
379 | |
380 =cut | |
381 ## START of SUB | |
382 sub get_spectrums_data { | |
383 ## Retrieve Values | |
384 my $self = shift ; | |
385 my ( $ms_file ) = @_ ; | |
386 my $control = 0 ; | |
387 my %features = ( | |
388 'ion_type' => undef, | |
389 'precursor_mz' => undef, | |
390 'precursor_type' => undef, | |
391 'num_peaks' => undef, | |
392 ) ; | |
393 if ( ( defined $ms_file ) and ( -e $ms_file )) { | |
394 open(MS, "<$ms_file") or die "Cant' read the file $ms_file\n" ; | |
395 while ( my $field = <MS> ){ | |
396 chomp $field ; | |
397 if ($field =~/MS\$FOCUSED_ION: ION_TYPE(.*)/) { $features{'ion_type'} = $1 ; $control++ ; } | |
398 elsif ($field =~/MS\$FOCUSED_ION: PRECURSOR_M\/Z(.*)/) { $features{'precursor_mz'} = $1 ; $control++ ; } | |
399 elsif ($field =~/MS\$FOCUSED_ION: PRECURSOR_TYPE(.*)/) { $features{'precursor_type'} = $1 ; $control++ ; } | |
400 elsif ($field =~/PK\$NUM_PEAK: (.*)/) { $features{'num_peaks'} = $1 ; $control++ ; } | |
401 else { next ; } | |
402 } | |
403 close(MS) ; | |
404 # for db field | |
405 } | |
406 else { | |
407 croak "Can't work with a undef / none existing massbank file\n" ; | |
408 } | |
409 if ($control == 0) { %features = () ; } | |
410 return(\%features) ; | |
411 } | |
412 ## END of SUB | |
413 | |
414 =head2 METHOD get_peaks_data | |
415 | |
416 ## Description : permet de recuperer tous les champs d'un object massbank | |
417 ## Input : $ms_file | |
418 ## Output : $features | |
419 ## Usage : my ( $features ) = get_peaks_data( $ms_file ) ; | |
420 | |
421 =cut | |
422 ## START of SUB | |
423 sub get_peaks_data { | |
424 ## Retrieve Values | |
425 my $self = shift ; | |
426 my ( $ms_file ) = @_ ; | |
427 | |
428 my @features = () ; | |
429 my $peaks = 0 ; | |
430 if ( ( defined $ms_file ) and ( -e $ms_file )) { | |
431 open(MS, "<$ms_file") or die "Cant' read the file $ms_file\n" ; | |
432 while ( my $field = <MS> ){ | |
433 chomp $field ; | |
434 if ($field =~/PK\$PEAK: m\/z int\. rel\.int\./) { $peaks = 1 ; } | |
435 elsif ( $peaks == 1 ) { ## detected peak area | |
436 if ($field =~/\s+(\d+)\s+(\d+)\s+(\d+)/) { | |
437 my %tmp = ( 'mz' => $1, 'intensity' => $2, 'relative_intensity' => $3 ) ; | |
438 push (@features, \%tmp) ; | |
439 } | |
440 ## for int = xx.xxx and mz = xxx.xxx | |
441 elsif ($field =~/\s+(\d+\.\d+)\s+(\d+\.\d+)\s+(\d+)/) { | |
442 my %tmp = ( 'mz' => $1, 'intensity' => $2, 'relative_intensity' => $3 ) ; | |
443 push (@features, \%tmp) ; | |
444 } | |
445 ## for int = xx and mz = xxx.xxx | |
446 elsif ($field =~/\s+(\d+\.\d+)\s+(\d+)\s+(\d+)/) { | |
447 my %tmp = ( 'mz' => $1, 'intensity' => $2, 'relative_intensity' => $3 ) ; | |
448 push (@features, \%tmp) ; | |
449 } | |
450 ## for int = xxxxx.xxx and mz = xxx | |
451 elsif ($field =~/\s+(\d+)\s+(\d+\.\d+)\s+(\d+)/) { | |
452 my %tmp = ( 'mz' => $1, 'intensity' => $2, 'relative_intensity' => $3 ) ; | |
453 push (@features, \%tmp) ; | |
454 } | |
455 } | |
456 else { next ; } | |
457 } | |
458 close(MS) ; | |
459 # for db field | |
460 } | |
461 else { | |
462 croak "Can't work with a undef / none existing massbank file\n" ; | |
463 } | |
464 return(\@features) ; | |
465 } | |
466 ## END of SUB | |
467 | |
468 =head2 METHOD getPeaksFromString | |
469 | |
470 ## Description : permet de recuperer la data peaks d'un record handler massbank | |
471 ## Input : $record | |
472 ## Output : $features | |
473 ## Usage : my ( $features ) = getPeaksFromString( $record ) ; | |
474 | |
475 =cut | |
476 ## START of SUB | |
477 sub getPeaksFromString { | |
478 ## Retrieve Values | |
479 my $self = shift ; | |
480 my ( $record ) = @_ ; | |
481 | |
482 my @features = () ; | |
483 my $peaks = 0 ; | |
484 if ( defined $record ) { | |
485 my @tmp = split(/\n/, $record) ; | |
486 foreach my $field (@tmp) { | |
487 if ($field =~/PK\$PEAK: m\/z int\. rel\.int\./) { $peaks = 1 ; } | |
488 elsif ( $peaks == 1 ) { ## detected peak area | |
489 if ($field =~/\s+(\d+)\s+(\d+)\s+(\d+)/) { | |
490 my %tmp = ( 'mz' => $1, 'intensity' => $2, 'relative_intensity' => $3 ) ; | |
491 push (@features, \%tmp) ; | |
492 } | |
493 ## for int = xx.xxx and mz = xxx.xxx | |
494 elsif ($field =~/\s+(\d+\.\d+)\s+(\d+\.\d+)\s+(\d+)/) { | |
495 my %tmp = ( 'mz' => $1, 'intensity' => $2, 'relative_intensity' => $3 ) ; | |
496 push (@features, \%tmp) ; | |
497 } | |
498 ## for int = xx and mz = xxx.xxx | |
499 elsif ($field =~/\s+(\d+\.\d+)\s+(\d+)\s+(\d+)/) { | |
500 my %tmp = ( 'mz' => $1, 'intensity' => $2, 'relative_intensity' => $3 ) ; | |
501 push (@features, \%tmp) ; | |
502 } | |
503 ## for int = xxxxx.xxx and mz = xxx | |
504 elsif ($field =~/\s+(\d+)\s+(\d+\.\d+)\s+(\d+)/) { | |
505 my %tmp = ( 'mz' => $1, 'intensity' => $2, 'relative_intensity' => $3 ) ; | |
506 push (@features, \%tmp) ; | |
507 } | |
508 ## for int = x.xxxex and m/z = xxx.xxx (int with exposant) | |
509 elsif ($field =~/\s+(\d+\.\d+)\s+(\d+\.\d+)e(\d)\s+(\d+)/) { | |
510 my %tmp = ( 'mz' => $1, 'intensity' => ($2*(10*$3)), 'relative_intensity' => $4 ) ; | |
511 push (@features, \%tmp) ; | |
512 } | |
513 } | |
514 else { next ; } | |
515 } | |
516 # for db field | |
517 } | |
518 else { | |
519 croak "Can't work with a undef / none existing massbank handler\n" ; | |
520 } | |
521 return(\@features) ; | |
522 } | |
523 ## END of SUB | |
524 | |
525 =head2 METHOD getIdFromString | |
526 | |
527 ## Description : get the accesion id of massbank record | |
528 ## Input : $record | |
529 ## Output : $id | |
530 ## Usage : my ( $id ) = getIdFromString ( $record ) ; | |
531 | |
532 =cut | |
533 ## START of SUB | |
534 sub getIdFromString { | |
535 ## Retrieve Values | |
536 my $self = shift ; | |
537 my ( $record ) = @_; | |
538 my ( $id ) = ( undef ) ; | |
539 | |
540 if ( defined $record ) { | |
541 my @tmp = split(/\n/, $record) ; | |
542 foreach my $field (@tmp) { | |
543 if ($field =~/ACCESSION:\s+(.+)/) { | |
544 $id = $1; | |
545 } | |
546 } | |
547 # for db field | |
548 } | |
549 else { | |
550 croak "Can't work with a undef / none existing massbank handler\n" ; | |
551 } | |
552 | |
553 return ($id) ; | |
554 } | |
555 ### END of SUB | |
556 | |
557 | |
558 | |
559 =head2 METHOD getInstrumentTypeFromString | |
560 | |
561 ## Description : get the instrument type of massbank record | |
562 ## Input : $record | |
563 ## Output : $instrumentType | |
564 ## Usage : my ( $instrumentType ) = getInstrumentTypeFromString ( $record ) ; | |
565 | |
566 =cut | |
567 ## START of SUB | |
568 sub getInstrumentTypeFromString { | |
569 ## Retrieve Values | |
570 my $self = shift ; | |
571 my ( $record ) = @_; | |
572 my ( $instrumentType ) = ( undef ) ; | |
573 | |
574 if ( defined $record ) { | |
575 my @tmp = split(/\n/, $record) ; | |
576 foreach my $field (@tmp) { | |
577 if ($field =~/INSTRUMENT_TYPE:\s+(.+)/) { | |
578 $instrumentType = $1; | |
579 } | |
580 } | |
581 # for db field | |
582 } | |
583 else { | |
584 croak "Can't work with a undef / none existing massbank handler\n" ; | |
585 } | |
586 | |
587 return ($instrumentType) ; | |
588 } | |
589 ### END of SUB | |
590 | |
591 =head2 METHOD getFormulaFromString | |
592 | |
593 ## Description : get the elementar formula of massbank record | |
594 ## Input : $record | |
595 ## Output : $formula | |
596 ## Usage : my ( $formula ) = getFormulaFromString ( $record ) ; | |
597 | |
598 =cut | |
599 ## START of SUB | |
600 sub getFormulaFromString { | |
601 ## Retrieve Values | |
602 my $self = shift ; | |
603 my ( $record ) = @_; | |
604 my ( $formula ) = ( undef ) ; | |
605 | |
606 if ( defined $record ) { | |
607 my @tmp = split(/\n/, $record) ; | |
608 foreach my $field (@tmp) { | |
609 if ($field =~/CH\$FORMULA:\s+(.+)/) { | |
610 $formula = $1; | |
611 } | |
612 } | |
613 # for db field | |
614 } | |
615 else { | |
616 croak "Can't work with a undef / none existing massbank handler\n" ; | |
617 } | |
618 | |
619 return ($formula) ; | |
620 } | |
621 ### END of SUB | |
622 | |
623 =head2 METHOD getInchiFromString | |
624 | |
625 ## Description : get the IUPAC InCHi of massbank record | |
626 ## Input : $record | |
627 ## Output : $inchi | |
628 ## Usage : my ( $inchi ) = getInchiFromString ( $record ) ; | |
629 | |
630 =cut | |
631 ## START of SUB | |
632 sub getInchiFromString { | |
633 ## Retrieve Values | |
634 my $self = shift ; | |
635 my ( $record ) = @_; | |
636 my ( $inchi ) = ( undef ) ; | |
637 | |
638 if ( defined $record ) { | |
639 my @tmp = split(/\n/, $record) ; | |
640 foreach my $field (@tmp) { | |
641 if ($field =~/CH\$IUPAC:\s+(.+)/) { | |
642 $inchi = $1; | |
643 } | |
644 } | |
645 # for db field | |
646 } | |
647 else { | |
648 croak "Can't work with a undef / none existing massbank handler\n" ; | |
649 } | |
650 | |
651 return ($inchi) ; | |
652 } | |
653 ### END of SUB | |
654 | |
655 =head2 METHOD getExactMzFromString | |
656 | |
657 ## Description : get the exact mass of massbank record | |
658 ## Input : $record | |
659 ## Output : $exactMass | |
660 ## Usage : my ( $exactMass ) = getExactMzFromString ( $record ) ; | |
661 | |
662 =cut | |
663 ## START of SUB | |
664 sub getExactMzFromString { | |
665 ## Retrieve Values | |
666 my $self = shift ; | |
667 my ( $record ) = @_; | |
668 my ( $exactMass ) = ( undef ) ; | |
669 | |
670 if ( defined $record ) { | |
671 my @tmp = split(/\n/, $record) ; | |
672 foreach my $field (@tmp) { | |
673 if ($field =~/CH\$EXACT_MASS:\s+(.+)/) { | |
674 $exactMass = $1; | |
675 } | |
676 } | |
677 # for db field | |
678 } | |
679 else { | |
680 croak "Can't work with a undef / none existing massbank handler\n" ; | |
681 } | |
682 | |
683 return ($exactMass) ; | |
684 } | |
685 ### END of SUB | |
686 | |
687 | |
688 =head2 METHOD getPrecursorTypeFromString | |
689 | |
690 ## Description : get the precursor type of massbank record | |
691 ## Input : $record | |
692 ## Output : $precursorType | |
693 ## Usage : my ( $precursorType ) = getPrecursorTypeFromString ( $record ) ; | |
694 | |
695 =cut | |
696 ## START of SUB | |
697 sub getPrecursorTypeFromString { | |
698 ## Retrieve Values | |
699 my $self = shift ; | |
700 my ( $record ) = @_; | |
701 my $id = undef ; | |
702 my $precursorType = undef ; | |
703 my $precursorType_first = undef ; | |
704 my $ionType_first = undef ; | |
705 my $precursorType_optionnal = undef ; | |
706 | |
707 if ( defined $record ) { | |
708 my @tmp = split(/\n/, $record) ; | |
709 foreach my $field (@tmp) { | |
710 if ($field =~/ACCESSION:\s+(.+)/) { | |
711 $id = $1; | |
712 } | |
713 if ($field =~/RECORD_TITLE:\s+(.+)/) { | |
714 my @title = split(/;/, $1) ; | |
715 $precursorType_optionnal = $title[-1] ; | |
716 $precursorType_optionnal =~ s/\s//g ; | |
717 } | |
718 if ($field =~/PRECURSOR_TYPE(.+)/) { | |
719 $precursorType_first = $1; | |
720 last; | |
721 } | |
722 if ($field =~/ION_TYPE(.+)/) { | |
723 $ionType_first = $1; | |
724 last; | |
725 } | |
726 } | |
727 # for db field | |
728 } | |
729 else { | |
730 croak "Can't work with a undef / none existing massbank handler\n" ; | |
731 } | |
732 | |
733 ## manage undef precursor/ion type field | |
734 # print "ID:$id-//-$precursorType_first-//-$ionType_first-//-$precursorType_optionnal\n" ; | |
735 if (defined $precursorType_first) { | |
736 $precursorType = $precursorType_first ; | |
737 } | |
738 elsif ( (!defined $precursorType_first) and (defined $ionType_first) ) { | |
739 $precursorType = $ionType_first ; | |
740 } | |
741 elsif ( (!defined $precursorType_first) and (!defined $ionType_first) and (defined $precursorType_optionnal) ) { | |
742 $precursorType = $precursorType_optionnal ; | |
743 } | |
744 else { | |
745 $precursorType = 'NA' ; | |
746 } | |
747 | |
748 return ($precursorType) ; | |
749 } | |
750 ### END of SUB | |
751 | |
752 =head2 METHOD getMsTypeFromString | |
753 | |
754 ## Description : get the MS type of massbank record | |
755 ## Input : $record | |
756 ## Output : $msType | |
757 ## Usage : my ( $msType ) = getMsTypeFromString ( $record ) ; | |
758 | |
759 =cut | |
760 ## START of SUB | |
761 sub getMsTypeFromString { | |
762 ## Retrieve Values | |
763 my $self = shift ; | |
764 my ( $record ) = @_; | |
765 my ( $msType ) = ( undef ) ; | |
766 | |
767 if ( defined $record ) { | |
768 my @tmp = split(/\n/, $record) ; | |
769 foreach my $field (@tmp) { | |
770 if ($field =~/AC\$MASS_SPECTROMETRY:\s+MS_TYPE\s+(.+)/) { | |
771 $msType = $1; | |
772 } | |
773 } | |
774 # for db field | |
775 } | |
776 else { | |
777 croak "Can't work with a undef / none existing massbank handler\n" ; | |
778 } | |
779 | |
780 return ($msType) ; | |
781 } | |
782 ### END of SUB | |
783 | |
784 =head2 METHOD getChemNamesFromString | |
785 | |
786 ## Description : get lits of names of a massbank record | |
787 ## Input : $record | |
788 ## Output : $names | |
789 ## Usage : my ( $names ) = getChemNamesFromString( $record ) ; | |
790 | |
791 =cut | |
792 ## START of SUB | |
793 sub getChemNamesFromString { | |
794 ## Retrieve Values | |
795 my $self = shift ; | |
796 my ( $record ) = @_ ; | |
797 | |
798 my @names = () ; | |
799 if ( defined $record ) { | |
800 my @tmp = split(/\n/, $record) ; | |
801 foreach my $field (@tmp) { | |
802 if ($field =~/CH\$NAME: (.*)/) { | |
803 push(@names, $1 ) ; } | |
804 else { next ; } | |
805 } | |
806 } | |
807 else { | |
808 croak "Can't work with a undef / none existing massbank record (string)\n" ; | |
809 } | |
810 return(\@names) ; | |
811 } | |
812 ## END of SUB | |
813 | |
814 | |
815 | |
816 | |
817 | |
818 =head2 METHOD getMassBankHandler | |
819 | |
820 ## Description : get a massbank handler from a file | |
821 ## Input : $record | |
822 ## Output : $massbankHandler | |
823 ## Usage : my ( $massbankHandler ) = getMassBankHandler ( $record ) ; | |
824 | |
825 =cut | |
826 ## START of SUB | |
827 sub getMassBankHandler { | |
828 ## Retrieve Values | |
829 my $self = shift ; | |
830 my ( $record ) = @_; | |
831 my ( $massbankHandler ) = ( undef ) ; | |
832 | |
833 ## TODO... | |
834 | |
835 return ($massbankHandler) ; | |
836 } | |
837 ### END of SUB | |
838 | |
839 =head2 METHOD get_annotations_data | |
840 | |
841 ## Description : permet de recuperer tous les champs d'un object massbank | |
842 ## Input : $ms_file | |
843 ## Output : $features | |
844 ## Usage : my ( $features ) = get_annotations_data( $ms_file ) ; | |
845 | |
846 =cut | |
847 ## START of SUB | |
848 sub get_annotations_data { | |
849 ## Retrieve Values | |
850 my $self = shift ; | |
851 my ( $ms_file ) = @_ ; | |
852 | |
853 my @features = () ; | |
854 if ( ( defined $ms_file ) and ( -e $ms_file )) { | |
855 open(MS, "<$ms_file") or die "Cant' read the file $ms_file\n" ; | |
856 while ( my $field = <MS> ){ | |
857 chomp $field ; | |
858 if ($field =~/PK\$ANNOTATION:(.*)/) { push( @features, $1) ; } | |
859 else { next ; } | |
860 } | |
861 close(MS) ; | |
862 # for db field | |
863 } | |
864 else { | |
865 croak "Can't work with a undef / none existing massbank file\n" ; | |
866 } | |
867 return(\@features) ; | |
868 } | |
869 ## END of SUB | |
870 | |
871 =head2 METHOD get_links_data | |
872 | |
873 ## Description : permet de recuperer tous les champs d'un object massbank | |
874 ## Input : $ms_file | |
875 ## Output : $features | |
876 ## Usage : my ( $features ) = get_annotations_data( $ms_file ) ; | |
877 | |
878 =cut | |
879 ## START of SUB | |
880 sub get_links_data { | |
881 ## Retrieve Values | |
882 my $self = shift ; | |
883 my ( $ms_file ) = @_ ; | |
884 | |
885 my %features = () ; | |
886 my $control = 0 ; | |
887 | |
888 my ( @CAS, @KEGG, @PUBCHEM ) = ((), (), ()) ; | |
889 | |
890 if ( ( defined $ms_file ) and ( -e $ms_file )) { | |
891 open(MS, "<$ms_file") or die "Cant' read the file $ms_file\n" ; | |
892 while ( my $field = <MS> ){ | |
893 chomp $field ; | |
894 if ($field =~/CH\$LINK: CAS (.*)/) { push (@CAS, $1) ; $control++; } | |
895 elsif ($field =~/CH\$LINK: KEGG (.*)/) { push (@KEGG, $1) ; $control++; } | |
896 elsif ($field =~/CH\$LINK: PUBCHEM CID (.*)/) { push (@PUBCHEM, $1) ; $control++; } | |
897 ## others !!? | |
898 | |
899 else { next ; } | |
900 } | |
901 close(MS) ; | |
902 # for db field | |
903 } | |
904 else { | |
905 croak "Can't work with a undef / none existing massbank file\n" ; | |
906 } | |
907 | |
908 $features{'CAS'} = \@CAS ; | |
909 $features{'KEGG'} = \@KEGG ; | |
910 $features{'PUBCHEM'} = \@PUBCHEM ; | |
911 | |
912 return(\%features) ; | |
913 } | |
914 ## END of SUB | |
915 | |
916 =head2 METHOD get_ms_record_links_data | |
917 | |
918 ## Description : permet de recuperer tous les champs d'un object massbank | |
919 ## Input : $ms_file | |
920 ## Output : $features | |
921 ## Usage : my ( $features ) = get_ms_record_links_data( $ms_file ) ; | |
922 | |
923 =cut | |
924 ## START of SUB | |
925 sub get_ms_record_links_data { | |
926 ## Retrieve Values | |
927 my $self = shift ; | |
928 my ( $ms_file ) = @_ ; | |
929 | |
930 ## Internal reference for MASSBANK and RESPECT | |
931 | |
932 my @massbank_id = ( 'TUE', 'GLS', 'AU', 'MSJ', 'ML','FIO', 'UF', 'CO', 'UO', 'TT', 'OUF', 'MCH', 'NU', 'KNA', 'MT', 'CE', 'KO', 'KZ', 'JEL', 'JP', 'PR', 'BML', 'CA', 'TY', 'PB', 'FU', 'EA', 'UT', 'BSU', 'WA' ) ; | |
933 my @respect_id = ( 'PS', 'PT', 'PM' ) ; | |
934 | |
935 my $dabase_used = undef ; | |
936 my %db = ( 'accession' => undef, 'name' => undef ) ; | |
937 my $control = 0 ; | |
938 | |
939 if ( $ms_file ) { | |
940 my $filename = basename("$ms_file", ".txt"); | |
941 | |
942 if ( $filename =~ /(\w+)$/ ) { # keep only record id (0001-PS0002 => PS0002 or BJ0045 => BJ0045) | |
943 $db{'accession'} = $1 ; | |
944 $control++ ; | |
945 if ( ( defined $db{'accession'} ) and ( $db{'accession'} =~ /(\D+)(\d+)/) ) { | |
946 my ($key, $eval) = ($1, 0) ; | |
947 foreach (@respect_id) { if ($_ eq $key) { $db{'name'} = 'RESPECT' ; $eval = 1 ; last ; } } | |
948 foreach (@massbank_id) { if ($_ eq $key) { $db{'name'} = 'MASSBANK' ; $eval = 1 ; last ; } } | |
949 if ( $eval == 0 ){ carp "The following key ($key) for $db{'accession'} has an unknown reference (not a Massbank or ReSpect source)\n" ; } | |
950 } | |
951 } | |
952 } | |
953 if ($control == 0) { %db = () ; } | |
954 return(\%db) ; | |
955 } | |
956 ## END of SUB | |
957 | |
958 | |
959 1 ; | |
960 | |
961 | |
962 __END__ | |
963 | |
964 =head1 SUPPORT | |
965 | |
966 You can find documentation for this module with the perldoc command. | |
967 | |
968 perldoc parser::chem::massbank.pm | |
969 | |
970 =head1 Exports | |
971 | |
972 =over 4 | |
973 | |
974 =item :ALL is ... | |
975 | |
976 =back | |
977 | |
978 =head1 AUTHOR | |
979 | |
980 Franck Giacomoni E<lt>franck.giacomoni@clermont.inra.frE<gt> | |
981 | |
982 =head1 LICENSE | |
983 | |
984 This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. | |
985 | |
986 =head1 VERSION | |
987 | |
988 version 1 : 25 / 06 / 2013 | |
989 | |
990 version 2 : ?? | |
991 | |
992 =cut |