comparison golm_ws_lib_search.pl @ 0:e3d43b8c987b draft

Init repository with last tool-bank-golm-lib_search master version
author fgiacomoni
date Mon, 05 Dec 2016 08:32:04 -0500
parents
children 11779b6402bc
comparison
equal deleted inserted replaced
-1:000000000000 0:e3d43b8c987b
1 #!perl
2
3 ## script : XXX.pl
4 #=============================================================================
5 # Included modules and versions
6 #=============================================================================
7 ## Perl modules
8 use strict ;
9 use warnings ;
10 use Carp qw (cluck croak carp) ;
11
12 use Data::Dumper ;
13 use Getopt::Long ;
14 use FindBin ; ## Allows you to locate the directory of original perl script
15
16 ## Specific Perl Modules (PFEM)
17 use lib $FindBin::Bin ;
18 my $binPath = $FindBin::Bin ;
19 use JSON ;
20
21 ## Dedicate Perl Modules PFEM
22 use lib::golm_ws_api qw( :ALL ) ;
23 use lib::msp qw( :ALL ) ;
24 use lib::output qw( :ALL ) ;
25 use lib::conf qw( :ALL ) ;
26
27 ## Initialized values
28 my ($OptHelp,$ri,$riWindow,$gcColumn,$inputFile,$inputMasses) = (undef,undef,undef,undef,undef,undef) ;
29 my ($maxHits,$mzRes,$maxIons,$threshold,$relative,$noise_threshold) = (undef,undef,undef,undef,undef,undef) ;
30 my ($JaccardDistanceThreshold,$s12GowerLegendreDistanceThreshold) = (undef,undef,undef,undef,undef) ;
31 my ($DotproductDistanceThreshold,$HammingDistanceThreshold,$EuclideanDistanceThreshold) = (undef,undef,undef) ;
32 my ($excel_file,$html_file,$html_template,$json_file,$csv_file) = (undef,undef,undef,undef,undef) ;
33 my (@hits, @ojson) = ( () , () ) ;
34 my $encoded_spectra ;
35
36 ## if you put no arguments, function help is started
37 if (!@ARGV){ &help ; }
38
39 #=============================================================================
40 # Manage EXCEPTIONS
41 #=============================================================================
42 &GetOptions (
43 "help|h" => \$OptHelp, # HELP
44 "inputFile:s" => \$inputFile,
45 "inputMasses:s" => \$inputMasses,
46 "ri:i" => \$ri,
47 "riWindow:i" => \$riWindow,
48 "gcColumn:s" => \$gcColumn,
49 "maxHits:i" => \$maxHits,
50 "mzRes:i" => \$mzRes,
51 "maxIons:i" => \$maxIons,
52 #"noiseThreshold:f" => \$noise_threshold,
53 "JaccardDistanceThreshold:f" => \$JaccardDistanceThreshold,
54 "s12GowerLegendreDistanceThreshold:f" => \$s12GowerLegendreDistanceThreshold,
55 "DotproductDistanceThreshold:f" => \$DotproductDistanceThreshold,
56 "HammingDistanceThreshold:f" => \$HammingDistanceThreshold,
57 "EuclideanDistanceThreshold:f" => \$EuclideanDistanceThreshold,
58 "relative:s" => \$relative,
59 "output_xls:s" => \$excel_file,
60 "output_html:s" => \$html_file,
61 "output_json:s" => \$json_file,
62 "output_tabular:s" => \$csv_file,
63 ) ;
64
65 die "maxHits must be >= 0\n" unless ($maxHits >= 0) ;
66 die "mzRes must be >= 0 \n" unless ($mzRes >= 0) ;
67 die "maxIons must be >= 0\n" unless ($maxIons >= 0) ;
68 #die "noiseThreshold must be > 0\n" unless ($noise_threshold > 0) ;
69
70 ## if you put the option -help or -h function help is started
71 if(defined($OptHelp)){ &help ; }
72
73 if( (!defined ($inputFile)) and (!defined($inputMasses) )){ warn "The input data is not defined (File or mass/intensity list AS string)\n" ; &help ; }
74
75 #=============================================================================
76 # MAIN SCRIPT
77 #=============================================================================
78
79 ## Create module objects ###
80
81 my $oapi = lib::golm_ws_api->new() ;
82 my $omsp = lib::msp->new() ;
83 my $o_output = lib::output->new() ;
84 my $oConf = lib::conf->new() ;
85
86
87 ## -------------- Conf file ------------------------ :
88 my ( $CONF ) = ( undef ) ;
89 foreach my $conf ( <$binPath/*.cfg> ) {
90 $CONF = $oConf->as_conf($conf) ;
91 }
92
93 ## -------------- HTML template file ------------------------ :
94 $html_template = <$binPath/golm_out.tmpl> ;
95 $CONF->{'HTML_TEMPLATE'} = $html_template ;
96
97
98 ## -------------- Retrieve values from conf file ------------------------ :
99 my $ws_url = $CONF->{'WS_URL'} ;
100 my $ws_proxy = $CONF->{'WS_PROXY'} ;
101 my $default_ri = $CONF->{'RI'} ;
102 my $default_ri_window = $CONF->{'RI_WINDOW'} ;
103 my $default_gc_column = $CONF->{'GC_COLUMN'} ;
104 my $default_entries = $CONF->{'DEFAULT_ENTRIES'} ;
105 my $analyte_ref = $CONF->{'ANALYTE_REF'} ;
106 my $metabolite_ref = $CONF->{'METABOLITE_REF'} ;
107 my $spectrum_ref = $CONF->{'SPECTRUM_REF'} ;
108
109 ############# -------------- Test the Golm web service -------------- ############# :
110
111 $oapi->test_query_golm($ws_url, $ws_proxy) ;
112
113 ############# -------------- Parse the .msp file -------------- ############# :
114
115
116 my $ref_mzs_res ;
117 my $ref_ints_res ;
118
119 ## Case when masses are entered manually
120 if (defined $inputMasses && !defined $inputFile) {
121
122 ## Retrieve masses from msp file
123 $ref_mzs_res = $omsp->get_masses_from_string($inputMasses, $mzRes) ;
124
125 ## Retrieve intensities from msp file
126 $ref_ints_res = $omsp->get_intensities_from_string($inputMasses) ;
127
128 ## Sorting intensities
129 my ($mzs_res_sorted, $ints_res_sorted) = $omsp->sorting_descending_intensities($ref_mzs_res, $ref_ints_res) ;
130
131 #************************
132 # Noise threshold: uncomment if it is not managed in MetaMS
133 #************************
134
135 ## Apply noise threshold
136 #my ($mzs_res_noise_threshold, $ints_res_noise_threshold) = $omsp->keep_ions_above_threshold($mzs_res_sorted, $ints_res_sorted) ;
137
138 #************************
139
140 ## Keep a limited number of ions according to $maxIons
141 if($maxIons > 0){
142
143 ## To uncomment if "Apply noise threshold is used"
144 #$ref_mzs_res = $omsp->keep_only_max_masses( $mzs_res_noise_threshold, $maxIons ) ;
145 #$ref_ints_res = $omsp->keep_only_max_intensities( $ints_res_noise_threshold, $maxIons ) ;
146
147 ## To uncomment if "Apply noise threshold is used"
148 $ref_mzs_res = $omsp->keep_only_max_masses( $mzs_res_sorted, $maxIons ) ;
149 $ref_ints_res = $omsp->keep_only_max_intensities( $ints_res_sorted, $maxIons ) ;
150 }
151
152 ## Remove redundant masses
153 my ($uniq_masses , $uniq_intensities) = $omsp->remove_redundants($ref_mzs_res, $ref_ints_res) ;
154
155 ## Relative intensity
156 my $relative_ints_res = undef ;
157 if ($relative eq "true") {
158 my @relative_ints = map { ($_ * 100)/@$ints_res_sorted[0] } @$ints_res_sorted ;
159 $relative_ints_res = \@relative_ints ;
160 }
161
162 ## Encode spectra
163 if (defined $relative_ints_res) {
164 $encoded_spectra = $omsp->encode_spectrum_for_query($mzs_res_sorted, $relative_ints_res) ;
165 }
166 else { $encoded_spectra = $omsp->encode_spectrum_for_query($mzs_res_sorted, $ints_res_sorted) ; }
167
168 }
169 ## Case with the msp file
170 elsif (defined $inputFile and -e $inputFile and !defined $inputMasses and defined $mzRes and defined $maxIons and defined $maxHits) {
171
172 unless (-f $inputFile) { croak "$inputFile is not a file" ; }
173 unless (-s $inputFile) { croak "$inputFile is empty" ; }
174
175 ## Get masses and their intensities
176 $ref_mzs_res = $omsp->get_mzs($inputFile, $mzRes) ;
177 $ref_ints_res = $omsp->get_intensities($inputFile, $maxIons) ;
178
179 ## Sorting intensities
180 my ($mzs_res_sorted, $ints_res_sorted) = $omsp->sorting_descending_intensities($ref_mzs_res, $ref_ints_res) ;
181
182 #************************
183 # Noise threshold: uncomment if it is not managed in MetaMS
184 #************************
185
186 ## Apply noise threshold if exists
187 #my ($mzs_res_noise_threshold, $ints_res_noise_threshold) = $omsp->keep_ions_above_threshold($mzs_res_sorted, $ints_res_sorted) ;
188
189 #************************
190
191
192 ## Keep only $maxIons ions
193 if($maxIons > 0){
194
195 ## To uncomment if "Apply noise threshold is used"
196 #$ref_mzs_res = $omsp->keep_only_max_masses( $mzs_res_noise_threshold, $maxIons ) ;
197 #$ref_ints_res = $omsp->keep_only_max_intensities( $ints_res_noise_threshold, $maxIons ) ;
198
199 ## To uncomment if "Apply noise threshold is used"
200 $mzs_res_sorted = $omsp->keep_only_max_masses( $mzs_res_sorted, $maxIons ) ;
201 $ints_res_sorted = $omsp->keep_only_max_intensities( $ints_res_sorted, $maxIons ) ;
202 }
203
204 ## Remove redundant masses
205 my ($uniq_masses , $uniq_intensities) = (undef,undef) ;
206 my @uniq_total_masses = () ;
207 my @uniq_total_intensities = () ;
208
209 for (my $i=0 ; $i<@$mzs_res_sorted && $i<@$ints_res_sorted ; $i++) {
210
211 ($uniq_masses , $uniq_intensities) = $omsp->remove_redundants(@$mzs_res_sorted[$i], @$ints_res_sorted[$i]) ;
212 push (@uniq_total_masses , $uniq_masses) ;
213 push (@uniq_total_intensities, $uniq_intensities) ;
214 }
215
216 ## Relative intensity
217 my $relative_ints_res = undef ;
218 if ($relative eq "true") {
219 $relative_ints_res = $omsp->apply_relative_intensity(\@uniq_total_intensities) ;
220 }
221
222 ## Encode spectra
223 if (defined $relative_ints_res) {
224 $encoded_spectra = $omsp->encode_spectrum_for_query(\@uniq_total_masses, $relative_ints_res) ;
225 }
226 else { $encoded_spectra = $omsp->encode_spectrum_for_query(\@uniq_total_masses, \@uniq_total_intensities) ; }
227
228 }
229 elsif (!defined $maxHits or !defined $maxIons or !defined $mzRes) { croak "Parameters mzRes or maxIons or maxHits are undefined\n"; }
230 elsif (!-f $inputFile) { croak "$inputFile does not exist" ; }
231
232 ############# -------------- Send queries to Golm -------------- ############# :
233
234 my $limited_hits ;
235 foreach my $spectrum (@$encoded_spectra){
236 ($limited_hits) = $oapi->LibrarySearch ($ri, $riWindow, $gcColumn, $spectrum, $maxHits, $JaccardDistanceThreshold,
237 $s12GowerLegendreDistanceThreshold,
238 $DotproductDistanceThreshold,
239 $HammingDistanceThreshold,
240 $EuclideanDistanceThreshold,
241 $ws_url, $ws_proxy,
242 $default_ri, $default_ri_window, $default_gc_column) ;
243 push (@hits , $limited_hits) ;
244 }
245
246
247 ############# -------------- Build outputs -------------- ############# :
248
249 my $jsons_obj = $o_output->build_json_res_object(\@hits) ;
250 #$o_output->write_json_skel(\$json_file, $jsons_obj) ;
251
252 # Build the ajax data source for html view
253 #my $ajax = $o_output->write_ajax_data_source($jsons_obj) ;
254
255
256 my $tbody_entries = $o_output->add_entries_to_tbody_object($jsons_obj,$analyte_ref,$metabolite_ref,$spectrum_ref) ;
257 $o_output->write_html_body($jsons_obj, $tbody_entries, $html_file, $html_template, $default_entries, $jsons_obj) ;
258 $o_output->excel_like_output($excel_file, $jsons_obj) ;
259 $o_output->write_csv($csv_file , $jsons_obj) ;
260
261
262 #====================================================================================
263 # Help subroutine called with -h option
264 # number of arguments : 0
265 # Argument(s) :
266 # Return : 1
267 #====================================================================================
268 sub help {
269 print STDERR "
270 golm_ws_lib_search.pl
271
272 # golm_ws_lib_search.pl is a script to use SOAP Golm webservice and send specific queries about spectra searches.
273 # Input : a list of masses (m/z) and their intensities.
274 # Authors : Gabriel Cretin / Franck Giacomoni / Yann Guitton
275 # Emails : franck.giacomoni\@clermont.inra.fr
276 # gabriel.cretin\@clermont.inra.fr
277 # yann.guitton\@oniris-nantes.fr
278 # Version : 1.2
279 # Created : 03/06/2016
280 # Updated : 28/11/2016
281 USAGE :
282 golm_ws_lib_search.pl -help OR
283
284 golm_ws_lib_search.pl
285 -spectraFile [.msp file]
286 -spectraMasses [masses + intensities of an ion: 'mz1 int1 mz2 int2 mzx intx...']
287 -ri [Rentention Index: float or integer]
288 -riWindow [Retention Index Window: 1500 or the value of your choice]
289 -gcColumn [AlkaneRetentionIndexGcColumnComposition: 'VAR5' or 'MDN35' or 'None']
290 -maxHits [Maximum hits per queried spectra: integer >= 1 (100 for all of them)]
291 -mzRes [Number of digits after the decimal point for m/z values: integer (0 if none)]
292 -maxIons [Number of m/z per spectra you want to keep for the queries, default 0 for all detected ions]
293 -noiseThreshold [Ions having intensity values less than this value are ignored]
294 -JaccardDistanceThreshold...............[
295 -s12GowerLegendreDistanceThreshold......[ Threshold for each score. Hits with greater scores are ignored: 0 (perfect match) < threshlold <= 1 (mismatch) ]
296 -DotproductDistanceThreshold............[
297 -EuclideanDistanceThreshold.............[
298 -HammingDistanceThreshold[Threshold for hamming score. Hits with greater scores are ignored: 0 - perfect match to higher values indicating a mismatch]
299 -relative [Transforms absolute intensities in the msp file into relative intensities: (intensity * 100)/ max(intensitiess), otherwise, leave them absolute: true or false]
300 -output_xls [name of the xls file in output: string]
301 -output_html [name of the html file in output: string]
302 -output_json [name of the json file in output: string]
303 -output_tabular [name of the csv file in output: string]
304
305 ";
306 exit(1);
307 }
308
309 ## END of script
310
311 __END__
312
313 =head1 NAME
314
315 golm_ws_lib_search.pl -- script to send GC-MS spectra queries to Golm Metabolome Database (GMD)
316
317 =head1 USAGE
318
319 golm_ws_lib_search.pl -help OR
320
321 golm_ws_lib_search.pl
322 -spectraFile [.msp file]
323 -spectraMasses [masses + intensities of an ion: 'mz1 int1 mz2 int2 mzx intx...']
324 -ri [Rentention Index: float or integer]
325 -riWindow [Retention Index Window: 1500 or the value of your choice]
326 -gcColumn [AlkaneRetentionIndexGcColumnComposition: 'VAR5' or 'MDN35' or 'None']
327 -maxHits [Maximum hits per queried spectra: integer >= 1 (100 for all of them)]
328 -mzRes [Number of digits after the decimal point for m/z values: integer (0 if none)]
329 -maxIons [Number of m/z per spectra you want to keep for the queries, default 0 for all detected ions]
330 -noiseThreshold [Ions having intensity values less than this value are ignored]
331 -JaccardDistanceThreshold...............[
332 -s12GowerLegendreDistanceThreshold......[ Threshold for each score. Hits with greater scores are ignored: 0 (perfect match) < threshlold <= 1 (mismatch) ]
333 -DotproductDistanceThreshold............[
334 -EuclideanDistanceThreshold.............[
335 -HammingDistanceThreshold[Threshold for hamming score. Hits with greater scores are ignored: 0 - perfect match to higher values indicating a mismatch]
336 -relative [Transforms absolute intensities in the msp file into relative intensities: (intensity * 100)/ max(intensitiess), otherwise, leave them absolute: true or false]
337 -output_xls [name of the xls file in output: string]
338 -output_html [name of the html file in output: string]
339 -output_json [name of the json file in output: string]
340 -output_tabular [name of the csv file in output: string]
341
342 =head1 SYNOPSIS
343
344 This script sends GC-MS EI spectra from an msp file given in argument to Golm Database, and presents results on a web interface.
345
346 =head1 DESCRIPTION
347
348 This main program is a ...
349
350 =over 4
351
352 =item B<function01>
353
354 =item B<function02>
355
356 =back
357
358 =head1 AUTHOR
359
360 Gabriel Cretin E<lt>gabriel.cretin@clermont.inra.frE<gt>
361 Franck Giacomoni E<lt>franck.giacomoni@clermont.inra.frE<gt>
362
363 =head1 LICENSE
364
365 This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself.
366
367 =head1 VERSION
368
369 version 1.0 : 03 / 06 / 2016
370
371 version 1.1 : 24 / 06 / 2016
372
373 version 1.2 : 28 / 11 / 2016
374
375 =cut