Mercurial > repos > fgiacomoni > golm_ws_lib_search
comparison golm_ws_lib_search.pl @ 0:e3d43b8c987b draft
Init repository with last tool-bank-golm-lib_search master version
author | fgiacomoni |
---|---|
date | Mon, 05 Dec 2016 08:32:04 -0500 |
parents | |
children | 11779b6402bc |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:e3d43b8c987b |
---|---|
1 #!perl | |
2 | |
3 ## script : XXX.pl | |
4 #============================================================================= | |
5 # Included modules and versions | |
6 #============================================================================= | |
7 ## Perl modules | |
8 use strict ; | |
9 use warnings ; | |
10 use Carp qw (cluck croak carp) ; | |
11 | |
12 use Data::Dumper ; | |
13 use Getopt::Long ; | |
14 use FindBin ; ## Allows you to locate the directory of original perl script | |
15 | |
16 ## Specific Perl Modules (PFEM) | |
17 use lib $FindBin::Bin ; | |
18 my $binPath = $FindBin::Bin ; | |
19 use JSON ; | |
20 | |
21 ## Dedicate Perl Modules PFEM | |
22 use lib::golm_ws_api qw( :ALL ) ; | |
23 use lib::msp qw( :ALL ) ; | |
24 use lib::output qw( :ALL ) ; | |
25 use lib::conf qw( :ALL ) ; | |
26 | |
27 ## Initialized values | |
28 my ($OptHelp,$ri,$riWindow,$gcColumn,$inputFile,$inputMasses) = (undef,undef,undef,undef,undef,undef) ; | |
29 my ($maxHits,$mzRes,$maxIons,$threshold,$relative,$noise_threshold) = (undef,undef,undef,undef,undef,undef) ; | |
30 my ($JaccardDistanceThreshold,$s12GowerLegendreDistanceThreshold) = (undef,undef,undef,undef,undef) ; | |
31 my ($DotproductDistanceThreshold,$HammingDistanceThreshold,$EuclideanDistanceThreshold) = (undef,undef,undef) ; | |
32 my ($excel_file,$html_file,$html_template,$json_file,$csv_file) = (undef,undef,undef,undef,undef) ; | |
33 my (@hits, @ojson) = ( () , () ) ; | |
34 my $encoded_spectra ; | |
35 | |
36 ## if you put no arguments, function help is started | |
37 if (!@ARGV){ &help ; } | |
38 | |
39 #============================================================================= | |
40 # Manage EXCEPTIONS | |
41 #============================================================================= | |
42 &GetOptions ( | |
43 "help|h" => \$OptHelp, # HELP | |
44 "inputFile:s" => \$inputFile, | |
45 "inputMasses:s" => \$inputMasses, | |
46 "ri:i" => \$ri, | |
47 "riWindow:i" => \$riWindow, | |
48 "gcColumn:s" => \$gcColumn, | |
49 "maxHits:i" => \$maxHits, | |
50 "mzRes:i" => \$mzRes, | |
51 "maxIons:i" => \$maxIons, | |
52 #"noiseThreshold:f" => \$noise_threshold, | |
53 "JaccardDistanceThreshold:f" => \$JaccardDistanceThreshold, | |
54 "s12GowerLegendreDistanceThreshold:f" => \$s12GowerLegendreDistanceThreshold, | |
55 "DotproductDistanceThreshold:f" => \$DotproductDistanceThreshold, | |
56 "HammingDistanceThreshold:f" => \$HammingDistanceThreshold, | |
57 "EuclideanDistanceThreshold:f" => \$EuclideanDistanceThreshold, | |
58 "relative:s" => \$relative, | |
59 "output_xls:s" => \$excel_file, | |
60 "output_html:s" => \$html_file, | |
61 "output_json:s" => \$json_file, | |
62 "output_tabular:s" => \$csv_file, | |
63 ) ; | |
64 | |
65 die "maxHits must be >= 0\n" unless ($maxHits >= 0) ; | |
66 die "mzRes must be >= 0 \n" unless ($mzRes >= 0) ; | |
67 die "maxIons must be >= 0\n" unless ($maxIons >= 0) ; | |
68 #die "noiseThreshold must be > 0\n" unless ($noise_threshold > 0) ; | |
69 | |
70 ## if you put the option -help or -h function help is started | |
71 if(defined($OptHelp)){ &help ; } | |
72 | |
73 if( (!defined ($inputFile)) and (!defined($inputMasses) )){ warn "The input data is not defined (File or mass/intensity list AS string)\n" ; &help ; } | |
74 | |
75 #============================================================================= | |
76 # MAIN SCRIPT | |
77 #============================================================================= | |
78 | |
79 ## Create module objects ### | |
80 | |
81 my $oapi = lib::golm_ws_api->new() ; | |
82 my $omsp = lib::msp->new() ; | |
83 my $o_output = lib::output->new() ; | |
84 my $oConf = lib::conf->new() ; | |
85 | |
86 | |
87 ## -------------- Conf file ------------------------ : | |
88 my ( $CONF ) = ( undef ) ; | |
89 foreach my $conf ( <$binPath/*.cfg> ) { | |
90 $CONF = $oConf->as_conf($conf) ; | |
91 } | |
92 | |
93 ## -------------- HTML template file ------------------------ : | |
94 $html_template = <$binPath/golm_out.tmpl> ; | |
95 $CONF->{'HTML_TEMPLATE'} = $html_template ; | |
96 | |
97 | |
98 ## -------------- Retrieve values from conf file ------------------------ : | |
99 my $ws_url = $CONF->{'WS_URL'} ; | |
100 my $ws_proxy = $CONF->{'WS_PROXY'} ; | |
101 my $default_ri = $CONF->{'RI'} ; | |
102 my $default_ri_window = $CONF->{'RI_WINDOW'} ; | |
103 my $default_gc_column = $CONF->{'GC_COLUMN'} ; | |
104 my $default_entries = $CONF->{'DEFAULT_ENTRIES'} ; | |
105 my $analyte_ref = $CONF->{'ANALYTE_REF'} ; | |
106 my $metabolite_ref = $CONF->{'METABOLITE_REF'} ; | |
107 my $spectrum_ref = $CONF->{'SPECTRUM_REF'} ; | |
108 | |
109 ############# -------------- Test the Golm web service -------------- ############# : | |
110 | |
111 $oapi->test_query_golm($ws_url, $ws_proxy) ; | |
112 | |
113 ############# -------------- Parse the .msp file -------------- ############# : | |
114 | |
115 | |
116 my $ref_mzs_res ; | |
117 my $ref_ints_res ; | |
118 | |
119 ## Case when masses are entered manually | |
120 if (defined $inputMasses && !defined $inputFile) { | |
121 | |
122 ## Retrieve masses from msp file | |
123 $ref_mzs_res = $omsp->get_masses_from_string($inputMasses, $mzRes) ; | |
124 | |
125 ## Retrieve intensities from msp file | |
126 $ref_ints_res = $omsp->get_intensities_from_string($inputMasses) ; | |
127 | |
128 ## Sorting intensities | |
129 my ($mzs_res_sorted, $ints_res_sorted) = $omsp->sorting_descending_intensities($ref_mzs_res, $ref_ints_res) ; | |
130 | |
131 #************************ | |
132 # Noise threshold: uncomment if it is not managed in MetaMS | |
133 #************************ | |
134 | |
135 ## Apply noise threshold | |
136 #my ($mzs_res_noise_threshold, $ints_res_noise_threshold) = $omsp->keep_ions_above_threshold($mzs_res_sorted, $ints_res_sorted) ; | |
137 | |
138 #************************ | |
139 | |
140 ## Keep a limited number of ions according to $maxIons | |
141 if($maxIons > 0){ | |
142 | |
143 ## To uncomment if "Apply noise threshold is used" | |
144 #$ref_mzs_res = $omsp->keep_only_max_masses( $mzs_res_noise_threshold, $maxIons ) ; | |
145 #$ref_ints_res = $omsp->keep_only_max_intensities( $ints_res_noise_threshold, $maxIons ) ; | |
146 | |
147 ## To uncomment if "Apply noise threshold is used" | |
148 $ref_mzs_res = $omsp->keep_only_max_masses( $mzs_res_sorted, $maxIons ) ; | |
149 $ref_ints_res = $omsp->keep_only_max_intensities( $ints_res_sorted, $maxIons ) ; | |
150 } | |
151 | |
152 ## Remove redundant masses | |
153 my ($uniq_masses , $uniq_intensities) = $omsp->remove_redundants($ref_mzs_res, $ref_ints_res) ; | |
154 | |
155 ## Relative intensity | |
156 my $relative_ints_res = undef ; | |
157 if ($relative eq "true") { | |
158 my @relative_ints = map { ($_ * 100)/@$ints_res_sorted[0] } @$ints_res_sorted ; | |
159 $relative_ints_res = \@relative_ints ; | |
160 } | |
161 | |
162 ## Encode spectra | |
163 if (defined $relative_ints_res) { | |
164 $encoded_spectra = $omsp->encode_spectrum_for_query($mzs_res_sorted, $relative_ints_res) ; | |
165 } | |
166 else { $encoded_spectra = $omsp->encode_spectrum_for_query($mzs_res_sorted, $ints_res_sorted) ; } | |
167 | |
168 } | |
169 ## Case with the msp file | |
170 elsif (defined $inputFile and -e $inputFile and !defined $inputMasses and defined $mzRes and defined $maxIons and defined $maxHits) { | |
171 | |
172 unless (-f $inputFile) { croak "$inputFile is not a file" ; } | |
173 unless (-s $inputFile) { croak "$inputFile is empty" ; } | |
174 | |
175 ## Get masses and their intensities | |
176 $ref_mzs_res = $omsp->get_mzs($inputFile, $mzRes) ; | |
177 $ref_ints_res = $omsp->get_intensities($inputFile, $maxIons) ; | |
178 | |
179 ## Sorting intensities | |
180 my ($mzs_res_sorted, $ints_res_sorted) = $omsp->sorting_descending_intensities($ref_mzs_res, $ref_ints_res) ; | |
181 | |
182 #************************ | |
183 # Noise threshold: uncomment if it is not managed in MetaMS | |
184 #************************ | |
185 | |
186 ## Apply noise threshold if exists | |
187 #my ($mzs_res_noise_threshold, $ints_res_noise_threshold) = $omsp->keep_ions_above_threshold($mzs_res_sorted, $ints_res_sorted) ; | |
188 | |
189 #************************ | |
190 | |
191 | |
192 ## Keep only $maxIons ions | |
193 if($maxIons > 0){ | |
194 | |
195 ## To uncomment if "Apply noise threshold is used" | |
196 #$ref_mzs_res = $omsp->keep_only_max_masses( $mzs_res_noise_threshold, $maxIons ) ; | |
197 #$ref_ints_res = $omsp->keep_only_max_intensities( $ints_res_noise_threshold, $maxIons ) ; | |
198 | |
199 ## To uncomment if "Apply noise threshold is used" | |
200 $mzs_res_sorted = $omsp->keep_only_max_masses( $mzs_res_sorted, $maxIons ) ; | |
201 $ints_res_sorted = $omsp->keep_only_max_intensities( $ints_res_sorted, $maxIons ) ; | |
202 } | |
203 | |
204 ## Remove redundant masses | |
205 my ($uniq_masses , $uniq_intensities) = (undef,undef) ; | |
206 my @uniq_total_masses = () ; | |
207 my @uniq_total_intensities = () ; | |
208 | |
209 for (my $i=0 ; $i<@$mzs_res_sorted && $i<@$ints_res_sorted ; $i++) { | |
210 | |
211 ($uniq_masses , $uniq_intensities) = $omsp->remove_redundants(@$mzs_res_sorted[$i], @$ints_res_sorted[$i]) ; | |
212 push (@uniq_total_masses , $uniq_masses) ; | |
213 push (@uniq_total_intensities, $uniq_intensities) ; | |
214 } | |
215 | |
216 ## Relative intensity | |
217 my $relative_ints_res = undef ; | |
218 if ($relative eq "true") { | |
219 $relative_ints_res = $omsp->apply_relative_intensity(\@uniq_total_intensities) ; | |
220 } | |
221 | |
222 ## Encode spectra | |
223 if (defined $relative_ints_res) { | |
224 $encoded_spectra = $omsp->encode_spectrum_for_query(\@uniq_total_masses, $relative_ints_res) ; | |
225 } | |
226 else { $encoded_spectra = $omsp->encode_spectrum_for_query(\@uniq_total_masses, \@uniq_total_intensities) ; } | |
227 | |
228 } | |
229 elsif (!defined $maxHits or !defined $maxIons or !defined $mzRes) { croak "Parameters mzRes or maxIons or maxHits are undefined\n"; } | |
230 elsif (!-f $inputFile) { croak "$inputFile does not exist" ; } | |
231 | |
232 ############# -------------- Send queries to Golm -------------- ############# : | |
233 | |
234 my $limited_hits ; | |
235 foreach my $spectrum (@$encoded_spectra){ | |
236 ($limited_hits) = $oapi->LibrarySearch ($ri, $riWindow, $gcColumn, $spectrum, $maxHits, $JaccardDistanceThreshold, | |
237 $s12GowerLegendreDistanceThreshold, | |
238 $DotproductDistanceThreshold, | |
239 $HammingDistanceThreshold, | |
240 $EuclideanDistanceThreshold, | |
241 $ws_url, $ws_proxy, | |
242 $default_ri, $default_ri_window, $default_gc_column) ; | |
243 push (@hits , $limited_hits) ; | |
244 } | |
245 | |
246 | |
247 ############# -------------- Build outputs -------------- ############# : | |
248 | |
249 my $jsons_obj = $o_output->build_json_res_object(\@hits) ; | |
250 #$o_output->write_json_skel(\$json_file, $jsons_obj) ; | |
251 | |
252 # Build the ajax data source for html view | |
253 #my $ajax = $o_output->write_ajax_data_source($jsons_obj) ; | |
254 | |
255 | |
256 my $tbody_entries = $o_output->add_entries_to_tbody_object($jsons_obj,$analyte_ref,$metabolite_ref,$spectrum_ref) ; | |
257 $o_output->write_html_body($jsons_obj, $tbody_entries, $html_file, $html_template, $default_entries, $jsons_obj) ; | |
258 $o_output->excel_like_output($excel_file, $jsons_obj) ; | |
259 $o_output->write_csv($csv_file , $jsons_obj) ; | |
260 | |
261 | |
262 #==================================================================================== | |
263 # Help subroutine called with -h option | |
264 # number of arguments : 0 | |
265 # Argument(s) : | |
266 # Return : 1 | |
267 #==================================================================================== | |
268 sub help { | |
269 print STDERR " | |
270 golm_ws_lib_search.pl | |
271 | |
272 # golm_ws_lib_search.pl is a script to use SOAP Golm webservice and send specific queries about spectra searches. | |
273 # Input : a list of masses (m/z) and their intensities. | |
274 # Authors : Gabriel Cretin / Franck Giacomoni / Yann Guitton | |
275 # Emails : franck.giacomoni\@clermont.inra.fr | |
276 # gabriel.cretin\@clermont.inra.fr | |
277 # yann.guitton\@oniris-nantes.fr | |
278 # Version : 1.2 | |
279 # Created : 03/06/2016 | |
280 # Updated : 28/11/2016 | |
281 USAGE : | |
282 golm_ws_lib_search.pl -help OR | |
283 | |
284 golm_ws_lib_search.pl | |
285 -spectraFile [.msp file] | |
286 -spectraMasses [masses + intensities of an ion: 'mz1 int1 mz2 int2 mzx intx...'] | |
287 -ri [Rentention Index: float or integer] | |
288 -riWindow [Retention Index Window: 1500 or the value of your choice] | |
289 -gcColumn [AlkaneRetentionIndexGcColumnComposition: 'VAR5' or 'MDN35' or 'None'] | |
290 -maxHits [Maximum hits per queried spectra: integer >= 1 (100 for all of them)] | |
291 -mzRes [Number of digits after the decimal point for m/z values: integer (0 if none)] | |
292 -maxIons [Number of m/z per spectra you want to keep for the queries, default 0 for all detected ions] | |
293 -noiseThreshold [Ions having intensity values less than this value are ignored] | |
294 -JaccardDistanceThreshold...............[ | |
295 -s12GowerLegendreDistanceThreshold......[ Threshold for each score. Hits with greater scores are ignored: 0 (perfect match) < threshlold <= 1 (mismatch) ] | |
296 -DotproductDistanceThreshold............[ | |
297 -EuclideanDistanceThreshold.............[ | |
298 -HammingDistanceThreshold[Threshold for hamming score. Hits with greater scores are ignored: 0 - perfect match to higher values indicating a mismatch] | |
299 -relative [Transforms absolute intensities in the msp file into relative intensities: (intensity * 100)/ max(intensitiess), otherwise, leave them absolute: true or false] | |
300 -output_xls [name of the xls file in output: string] | |
301 -output_html [name of the html file in output: string] | |
302 -output_json [name of the json file in output: string] | |
303 -output_tabular [name of the csv file in output: string] | |
304 | |
305 "; | |
306 exit(1); | |
307 } | |
308 | |
309 ## END of script | |
310 | |
311 __END__ | |
312 | |
313 =head1 NAME | |
314 | |
315 golm_ws_lib_search.pl -- script to send GC-MS spectra queries to Golm Metabolome Database (GMD) | |
316 | |
317 =head1 USAGE | |
318 | |
319 golm_ws_lib_search.pl -help OR | |
320 | |
321 golm_ws_lib_search.pl | |
322 -spectraFile [.msp file] | |
323 -spectraMasses [masses + intensities of an ion: 'mz1 int1 mz2 int2 mzx intx...'] | |
324 -ri [Rentention Index: float or integer] | |
325 -riWindow [Retention Index Window: 1500 or the value of your choice] | |
326 -gcColumn [AlkaneRetentionIndexGcColumnComposition: 'VAR5' or 'MDN35' or 'None'] | |
327 -maxHits [Maximum hits per queried spectra: integer >= 1 (100 for all of them)] | |
328 -mzRes [Number of digits after the decimal point for m/z values: integer (0 if none)] | |
329 -maxIons [Number of m/z per spectra you want to keep for the queries, default 0 for all detected ions] | |
330 -noiseThreshold [Ions having intensity values less than this value are ignored] | |
331 -JaccardDistanceThreshold...............[ | |
332 -s12GowerLegendreDistanceThreshold......[ Threshold for each score. Hits with greater scores are ignored: 0 (perfect match) < threshlold <= 1 (mismatch) ] | |
333 -DotproductDistanceThreshold............[ | |
334 -EuclideanDistanceThreshold.............[ | |
335 -HammingDistanceThreshold[Threshold for hamming score. Hits with greater scores are ignored: 0 - perfect match to higher values indicating a mismatch] | |
336 -relative [Transforms absolute intensities in the msp file into relative intensities: (intensity * 100)/ max(intensitiess), otherwise, leave them absolute: true or false] | |
337 -output_xls [name of the xls file in output: string] | |
338 -output_html [name of the html file in output: string] | |
339 -output_json [name of the json file in output: string] | |
340 -output_tabular [name of the csv file in output: string] | |
341 | |
342 =head1 SYNOPSIS | |
343 | |
344 This script sends GC-MS EI spectra from an msp file given in argument to Golm Database, and presents results on a web interface. | |
345 | |
346 =head1 DESCRIPTION | |
347 | |
348 This main program is a ... | |
349 | |
350 =over 4 | |
351 | |
352 =item B<function01> | |
353 | |
354 =item B<function02> | |
355 | |
356 =back | |
357 | |
358 =head1 AUTHOR | |
359 | |
360 Gabriel Cretin E<lt>gabriel.cretin@clermont.inra.frE<gt> | |
361 Franck Giacomoni E<lt>franck.giacomoni@clermont.inra.frE<gt> | |
362 | |
363 =head1 LICENSE | |
364 | |
365 This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. | |
366 | |
367 =head1 VERSION | |
368 | |
369 version 1.0 : 03 / 06 / 2016 | |
370 | |
371 version 1.1 : 24 / 06 / 2016 | |
372 | |
373 version 1.2 : 28 / 11 / 2016 | |
374 | |
375 =cut |