Mercurial > repos > fgiacomoni > lipidmaps_textsearch
comparison wsdl_lipidmaps.pl @ 0:e8bd49794291 draft
Init repository with last lipidmaps_textsearch master version
author | fgiacomoni |
---|---|
date | Tue, 11 Apr 2017 03:47:06 -0400 |
parents | |
children | 1276908e8fc4 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:e8bd49794291 |
---|---|
1 #! perl | |
2 | |
3 use strict ; | |
4 no strict "refs" ; | |
5 use warnings ; | |
6 use Carp qw (cluck croak carp confess) ; | |
7 use Exporter ; | |
8 use diagnostics ; | |
9 | |
10 use Data::Dumper ; | |
11 use POSIX ; | |
12 use XML::Twig; | |
13 use Getopt::Long ; | |
14 use Time::HiRes; | |
15 | |
16 | |
17 ## Permet de localisez le repertoire du script perl d'origine | |
18 use FindBin; | |
19 ## permet de definir la localisation des .pm et .conf | |
20 use lib $FindBin::Bin ; | |
21 #my $libPath = $FindBin::Bin."/lib"; | |
22 my $binPath = $FindBin::Bin ; | |
23 | |
24 ## dedicated lib | |
25 | |
26 use lib::lipidmaps ; | |
27 use lib::parser ; | |
28 use lib::writer ; | |
29 # more inra lib | |
30 use lib::conf qw( :ALL ) ; | |
31 use lib::csv qw( :ALL ) ; | |
32 use lib::operations qw( :ALL ) ; | |
33 | |
34 ## Initialized values | |
35 # | |
36 my $version = '1.2'; | |
37 my ( $help, $input_file, $line_header, $col_mass, $col_rt, $decimal, $round_type, $delta, $mode ) = ( undef, undef, undef, undef, undef, undef, undef, undef, undef ) ; | |
38 my ( $list_oxidation, $list_neutral_loss ) = ( undef, undef ) ; | |
39 my ( $col_classif_id, $selected_cat, $selected_cl, $selected_subcl ) = ( undef, undef, undef, undef ) ; | |
40 my ( $output_csv_file, $output_html_file ) = ( undef, undef ) ; | |
41 | |
42 ## Verbose levels (1 OR 3) | |
43 my $verbose = 3 ; | |
44 | |
45 | |
46 &GetOptions ( "help|h" => \$help, # HELP | |
47 "input|i:s" => \$input_file, # path for input file (CSV format) -- Mandatory | |
48 "lineheader:i" => \$line_header, ## header presence in tabular file | |
49 "colmass:i" => \$col_mass, # Input file Column containing Masses for query -- Mandatory | |
50 # "colrt:i" => \$col_rt, # Input file Column containing Retention time | |
51 "decimal:i" => \$decimal , # Significante decimal on mass -- Mandatory | |
52 "listoxidation:s" => \$list_oxidation, ## option : liste des atomes a gerer sur les masses experimentales | |
53 "listneutralloss:s" => \$list_neutral_loss, ## option : liste des atomes a gerer sur les masses experimentales | |
54 "round:s" => \$round_type, # Type of truncation -- Mandatory | |
55 "delta:f" => \$delta, # delta of mass -- Mandatory | |
56 "cat:s" => \$selected_cat, # Number corresponding to the main category in LIPIDMAPS -- Optional | |
57 "class:s" => \$selected_cl, # Number corresponding to the main classe in LIPIDMAPS -- Optional | |
58 "subclass:s" => \$selected_subcl, # Number corresponding to the sub class in LIPIDMAPS -- Optional | |
59 "output:s" => \$output_csv_file, # File+Path for the results (CVS) -- Mandatory | |
60 "view:s" => \$output_html_file, # File+Path for the view results (HTML) -- Mandatory | |
61 "colclassif:i" => \$col_classif_id, # Input file Column containing LM classes ID for query -- Optional | |
62 "mode:s" => \$mode, # mode of the initial data | |
63 ) ; | |
64 | |
65 #============================================================================= | |
66 # EXCEPTIONS | |
67 #============================================================================= | |
68 $help and &help ; | |
69 | |
70 ## --------------- Global parameters ---------------- : | |
71 my $nb_pages_for_html_out = 1 ; | |
72 | |
73 ## Conf file | |
74 my ( $CONF, %RULES, %RECIPES, %TRANSFO ) = ( undef, (), (), () ) ; | |
75 foreach my $conf ( <$binPath/*.conf> ) { | |
76 my $oConf = lib::conf::new() ; | |
77 $CONF = $oConf->as_conf($conf) ; | |
78 } | |
79 | |
80 ## -------------- HTML template file ------------------------ : | |
81 foreach my $html_template ( <$binPath/*.tmpl> ) { $CONF->{'HTML_TEMPLATE'} = $html_template ; } | |
82 | |
83 ## work with it : | |
84 ## get RULES lists : | |
85 foreach (keys (%$CONF)) { | |
86 if( $_ =~/^RULE/ ) { $RULES{$_} = $CONF->{$_} ; } ## rules for clustering | |
87 elsif( $_ =~/^RECIPE/ ) { $RECIPES{$_} = $CONF->{$_} ; } ## fields retrieved with each rule | |
88 elsif( $_ =~/^ANNOT/ ) { $TRANSFO{$_} = $CONF->{$_} ; } ## Transformation annotation in output files | |
89 } | |
90 | |
91 ## Init var | |
92 my ( $init_csv_rows, $init_mzs, $init_rts, $classif_ids, $round_init_mzs ) = ( undef, undef, undef, undef, undef ) ; | |
93 my ( @ox_or_loss_names, @ox_or_loss_values, @transfo_init_mzs, @transfo_annotations, @transfo_init_mz_queries, @transfo_init_mz_results, @entries_results, @clusters_results, @entries_total_nb ) = ( (), (), (), (), (), (), (), (), () ) ; | |
94 my ( $ox_names, $ox_values, $loss_names, $loss_values ) = ( [], [], [], [] ) ; | |
95 my ( $is_header, $tbody_object) = (undef, undef) ; | |
96 | |
97 | |
98 print "-----------**********START of MAIN LIPIDMAPS -- version $version *********-------------\n" if ($verbose == 3); | |
99 | |
100 #### --------------------------------- 01 :: Prepare all and Parsing steps on inputs ------------------------------------- | |
101 | |
102 ## Open CVS FILE / Extract and transform Masses | |
103 if ( ( defined $input_file ) and ( -e $input_file ) ) { | |
104 print "\n[INFO] Open input file and get values...\n" if ($verbose == 3); | |
105 ## parse all csv for later : output csv build | |
106 my $ocsv_input = lib::csv->new() ; | |
107 my $csv = $ocsv_input->get_csv_object( "\t" ) ; | |
108 $init_csv_rows = $ocsv_input->parse_csv_object($csv, \$input_file) ; | |
109 | |
110 | |
111 if ( ( defined $line_header ) and ( $line_header > 0 ) ) { $is_header = 'yes' ; } | |
112 | |
113 ## parse masses | |
114 if ( defined $col_mass ) { | |
115 print "[INFO] Get masses from input file $input_file ...\n" if ($verbose == 3); | |
116 # print "[INFO] Get RT from input file $input_file ...\n" if ($verbose == 3); | |
117 my $ocsv = lib::csv->new() ; | |
118 my $csv = $ocsv->get_csv_object( "\t" ) ; | |
119 $init_mzs = $ocsv->get_value_from_csv_multi_header( $csv, $input_file, $col_mass, $is_header, $line_header ) ; ## retrieve mz values on csv | |
120 # $init_rts = $ocsv->get_value_from_csv_multi_header( $csv, $input_file, $col_rt, $is_header, $line_header ) ; ## retrieve rt values on csv | |
121 } | |
122 | |
123 ## Adjust the mz to the instrument mode (POS/NEG) | |
124 if ( ( defined $mode ) and ( ($mode eq 'POS') or ($mode eq 'NEG') ) ) { | |
125 print "\t [INFO] Apply mass mode transforming (POS to NEU or NEG to NEU) ...\n" if ($verbose == 3); | |
126 my @mode_init_mzs = () ; | |
127 my $omode = lib::operations::new() ; | |
128 foreach my $mz (@$init_mzs) { | |
129 push (@mode_init_mzs, ${$omode->manage_mode(\$mode, \1, \0.0005486, \1.007825, \$mz)} ) ; | |
130 } | |
131 | |
132 if ( (scalar @$init_mzs) == (scalar @mode_init_mzs) ) { | |
133 $init_mzs = \@mode_init_mzs ; | |
134 } | |
135 else { | |
136 carp "[ERROR] The mode managing process failed and init mzs have been corrompted\n" | |
137 } | |
138 } | |
139 else { | |
140 print "\t [INFO] Apply no mass mode transforming\n" if ($verbose == 3); | |
141 } | |
142 | |
143 ## round masses | |
144 if ( ( defined $round_type ) and ( defined $decimal ) ) { | |
145 print "\t [INFO] Apply mass rounding ...\n" if ($verbose == 3); | |
146 my $oround = lib::operations::new() ; | |
147 if ( $round_type eq 'truncation' ) { $round_init_mzs = $oround->truncate_nums( $init_mzs, $decimal ) ; } | |
148 elsif ( $round_type eq 'round' ) { $round_init_mzs = $oround->round_nums( $init_mzs, $decimal ) ; } | |
149 else { croak "The selected option for data transformation is unknown !\n" ; } | |
150 } | |
151 ## parse classif ids -- optionnal | |
152 if ( defined $col_classif_id ) { | |
153 print "\t [INFO] Get LM classification IDS from input file $input_file ...\n" if ($verbose == 3); | |
154 my $ocsv = lib::csv::new() ; | |
155 my $csv = $ocsv->get_csv_object( "\t" ) ; | |
156 $classif_ids = $ocsv->get_value_from_csv( $csv, $input_file, $col_classif_id, $is_header, $line_header ) ; | |
157 } | |
158 | |
159 | |
160 | |
161 ## Uses N mz and theirs entries per page (see config file). | |
162 # how many pages you need with your input mz list? | |
163 # $nb_pages_for_html_out = ceil( scalar(@{$init_mzs} ) / $CONF->{HTML_ENTRIES_PER_PAGE} ) ; | |
164 # print "[INFO] Your analysis will generate $nb_pages_for_html_out pages of results...\n" if ($verbose == 3); | |
165 | |
166 } | |
167 else { | |
168 print "[ERROR] Can't find any input file $input_file\n" if ($verbose == 3); | |
169 croak "Can't find any input file $input_file\n" ; | |
170 } | |
171 | |
172 #### ------------------- 02 :: optionnal work on masses with neutral loss and/or oxydation == modif : ------------------- | |
173 | |
174 # get and merge ox and neutral loss envt : | |
175 my $oparser = lib::parser::new() ; | |
176 if ( ( defined $list_oxidation ) and ( defined $CONF ) ) { ( $ox_names, $ox_values ) = $oparser->get_oxidation_ref( $CONF, $list_oxidation ) ; } | |
177 if ( @{$ox_values} ) { push( @ox_or_loss_values, @{$ox_values} ) ; push( @ox_or_loss_names, @{$ox_names} ) ; } | |
178 if ( ( defined $list_neutral_loss ) and ( defined $CONF ) ) { ( $loss_names, $loss_values ) = $oparser->get_neutral_loss_ref( $CONF, $list_neutral_loss ) ; } | |
179 if ( @{$loss_values} ) { push( @ox_or_loss_values, @{$loss_values} ) ; push( @ox_or_loss_names, @{$loss_names} ) ; } | |
180 | |
181 # prepare a list of masses indpt of modif (ox/neutral loss) presence. | |
182 my $init_mz_index = 0 ; | |
183 my $i = 0 ; | |
184 | |
185 foreach my $init_mz (@{$round_init_mzs}) { | |
186 | |
187 my @transfo_values_list = () ; | |
188 my @transfo_name_list = () ; | |
189 my $init_annot = 'Init_MZ' ; | |
190 | |
191 push ( @transfo_values_list, \$init_mz ) ; ## the submitted init mass | |
192 ## work on values | |
193 if ( @ox_or_loss_values ) { | |
194 my $oround = lib::operations::new() ; | |
195 my $round_transfo_mzs = $oround->round_nums( \@ox_or_loss_values, $decimal ) ; ## We choose to around the number. | |
196 foreach my $transfo_mz ( @{$round_transfo_mzs} ) { | |
197 my $osub = lib::operations::new() ; | |
198 my $transfo_init_mz = $osub->subtract_num( $init_mz, $transfo_mz ) ; | |
199 push ( @transfo_values_list, $transfo_init_mz ) ; | |
200 } | |
201 } | |
202 | |
203 ## work on annotation for output | |
204 push ( @transfo_name_list, \$init_annot) ; ## init annot | |
205 if ( @ox_or_loss_names ) { | |
206 foreach my $ox_or_loss_name (@ox_or_loss_names) { | |
207 if ( $TRANSFO{'ANNOT_'.$ox_or_loss_name} ) { | |
208 my $transfo = $TRANSFO{'ANNOT_'.$ox_or_loss_name} ; | |
209 push ( @transfo_name_list, \$transfo ) ; } | |
210 } | |
211 } | |
212 | |
213 ## push final arrays | |
214 push ( @transfo_init_mzs, \@transfo_values_list ) ; | |
215 push ( @transfo_annotations, \@transfo_name_list ) ; | |
216 | |
217 ## foreach transfo mass (round and/or modif) | |
218 | |
219 my ( @queries, @query_results, @query_result_entries, @query_result_entry_nbs, @query_result_clusters ) = ( (), (), (), (), () ) ; | |
220 | |
221 foreach my $transfo_mz ( @{$transfo_init_mzs[$init_mz_index]} ) { | |
222 print "[INFO] Prepare the $i.th query with the mz: $$transfo_mz... \n" if ($verbose == 3); | |
223 ## LM recommandation : If you write a script to automate calls to LMSD, | |
224 # please be kind and do not hit our server more often than once per 20 seconds. | |
225 # We may have to kill scripts that hit our server more frequently. | |
226 Time::HiRes::sleep(0.1); #.1 seconds | |
227 my ( $cat, $cl, $subcl ) = ( undef, undef, undef ) ; | |
228 # if ( $i >= ( scalar( @transfos_values )-1 ) ) { $i = 0 ; } ## manage the modif for each masses. | |
229 | |
230 ## get the classif level : | |
231 if ( defined $classif_ids ) { | |
232 | |
233 if ( $classif_ids->[$i] ) { | |
234 my $olevel = lib::parser::new() ; | |
235 $cat = $olevel->set_category( $classif_ids->[$i] ) ; | |
236 $cl = $olevel->set_class( $classif_ids->[$i] ) ; | |
237 $subcl = $olevel->set_subclass( $classif_ids->[$i] ) ; | |
238 # ( $cat, $cl, $subcl ) = ( $$cat, $$cl, $$subcl ) ; | |
239 } | |
240 else { croak "This information is not available in your parsing ids\n" ; } | |
241 } | |
242 else { | |
243 if ( ( defined $selected_subcl) or ( defined $selected_cl ) or ( defined $selected_cat ) ) { | |
244 if ( ( $selected_cat !~ /^NA/ ) ) { ( $cat ) = ( \$selected_cat ) ; } | |
245 if ( ( $selected_cl !~ /^NA(.*)/ ) ) { ( $cl ) = ( \$selected_cl ) ; } | |
246 if ( ( $selected_subcl !~ /^NA(.*)/ ) ) { ( $subcl ) = ( \$selected_subcl ) ; } | |
247 } | |
248 else { croak "No selected category or classification ids list\n" ; } | |
249 } | |
250 | |
251 ## buid and get http query : | |
252 my $oquery = lib::lipidmaps::new() ; | |
253 my $ref_http_query = $oquery->build_lm_mass_query( \$CONF->{'SEARCH_URL'}, \$delta, $cat, $cl, $subcl ) ; ## build the query for LM WS, return a list of http, get method | |
254 print "\t[INFO] Exec $$ref_http_query \n" if ($verbose == 3); | |
255 | |
256 ## set entries clusters | |
257 my ( $http_result_mz, $http_query_mz ) = $oquery->get_lm_mass_query($ref_http_query, $transfo_mz) ; ## execute the query, return a list of non-splited lm_entries. | |
258 my ( $mz_entries_results, $mz_entries_nb, $mz_clusters_results ) = ( undef, undef, undef ) ; | |
259 if ( (defined $http_result_mz) and ( $$http_result_mz ne '' ) ) { # avoid empty LM results | |
260 ( $mz_entries_results, $mz_entries_nb ) = $oquery->get_lm_entry_object($http_result_mz, $transfo_mz) ; ## get all features of each entry and return a list of features keept in a hash | |
261 $mz_clusters_results = $oquery->get_cluster_object($mz_entries_results, \%RULES, \%RECIPES) ; ## clustering all entries and return a list of clusters keept in a hash | |
262 print "\t[INFO] The query return $$mz_entries_nb entries\n" if ($verbose == 3); | |
263 } | |
264 else { # manage empty LM results | |
265 ( $mz_entries_results, $mz_entries_nb, $mz_clusters_results ) = ( [], \0, [] ) ; | |
266 print "\t[INFO] The query return none entry with LM\n" if ($verbose == 3); | |
267 } | |
268 | |
269 push( @queries, $http_query_mz ) ; | |
270 push( @query_results, $http_result_mz ) ; | |
271 push( @query_result_entries, $mz_entries_results ) ; | |
272 push( @query_result_entry_nbs, $mz_entries_nb ) ; | |
273 push( @query_result_clusters, $mz_clusters_results ) ; | |
274 | |
275 } ## end foreach transfo_mz | |
276 | |
277 $i++ ; # implem the mz rank | |
278 | |
279 push( @transfo_init_mz_queries, \@queries ) ; | |
280 push( @transfo_init_mz_results, \@query_results ) ; | |
281 push( @entries_results, \@query_result_entries ) ; | |
282 push( @entries_total_nb, \@query_result_entry_nbs ) ; | |
283 push( @clusters_results, \@query_result_clusters ) ; | |
284 | |
285 $init_mz_index++ ; | |
286 } ## end foreach init_mz | |
287 | |
288 | |
289 #### -------------------------------- 05 :: Writes LM results -------------------------------------------- | |
290 | |
291 # prepare data and write html output : | |
292 if ( defined $output_html_file) { | |
293 ## Adjust html output with only mz with records | |
294 my ($nb_pages, $total_entries) = (0, 0) ; | |
295 foreach (@entries_total_nb) { | |
296 foreach my $nb ( @{$_} ) { $total_entries += $$nb ; } | |
297 if ($total_entries > 0) { $nb_pages++ ; } | |
298 $total_entries = 0 ; | |
299 } | |
300 | |
301 $nb_pages_for_html_out = ceil( $nb_pages / $CONF->{HTML_ENTRIES_PER_PAGE} ) ; | |
302 print "[INFO] write HTML output file containing $nb_pages_for_html_out pages\n" if ($verbose == 3); | |
303 | |
304 my $ohtml = lib::writer->new() ; | |
305 $tbody_object = $ohtml->set_html_tbody_object( $nb_pages_for_html_out ) ; | |
306 $tbody_object = $ohtml->add_mz_to_tbody_object( $tbody_object, $CONF->{HTML_ENTRIES_PER_PAGE}, $init_mzs, \@entries_total_nb) ; | |
307 $tbody_object = $ohtml->add_transformation_to_tbody_object( \@transfo_init_mzs, \@transfo_annotations, $tbody_object ) ; | |
308 $tbody_object = $ohtml->add_cluster_to_tbody_object( \@transfo_init_mzs, \@clusters_results, $tbody_object ) ; | |
309 $tbody_object = $ohtml->add_entry_to_tbody_object( \@transfo_init_mzs, \@clusters_results, \@entries_results, $tbody_object ) ; | |
310 | |
311 $tbody_object = $ohtml->sort_tbody_object($tbody_object) ; | |
312 | |
313 my $output_html = $ohtml->write_html_skel(\$output_html_file, $tbody_object, $nb_pages_for_html_out, $CONF->{'HTML_TEMPLATE'}, $CONF->{'JS_GALAXY_PATH'}, $CONF->{'CSS_GALAXY_PATH'}) ; | |
314 } | |
315 | |
316 | |
317 #write csv ouput : add 'lipidmaps' column to input file | |
318 my $lm_matrix = undef ; | |
319 my $ocsv = lib::writer->new() ; | |
320 if ( defined $is_header ) { $lm_matrix = $ocsv->set_lm_matrix_object('LIPIDMAPS(score::name::mz::formula::adduct::id)', $init_mzs, \@transfo_annotations, \@clusters_results ) ; } | |
321 else { $lm_matrix = $ocsv->set_lm_matrix_object( undef, $init_mzs, \@transfo_annotations, \@clusters_results ) ; } | |
322 | |
323 | |
324 $lm_matrix = $ocsv->add_lm_matrix_to_input_matrix($init_csv_rows, $lm_matrix) ; | |
325 $ocsv->write_csv_skel(\$output_csv_file, $lm_matrix) ; | |
326 print "[INFO] write CSV output file\n" if ($verbose == 3); | |
327 | |
328 print "-----------**********END of MAIN LIPIDMAPS -- version $version *********-------------\n" if ($verbose == 3); | |
329 | |
330 #print "-----------**********RETURNS*********-------------\n" ; | |
331 #print "\n----- Init Input Data in CSV -----\n" ; | |
332 #print Dumper $init_csv_rows ; | |
333 #print "\n---- Init masses parsed ...\n" ; | |
334 #print Dumper $init_mzs ; | |
335 #print "\n---- Init rts parsed ...\n" ; | |
336 #print Dumper $init_rts ; | |
337 #print "\n---- Init masses arounded ...\n" ; | |
338 #print Dumper $round_init_mzs ; | |
339 #print "\n---- Ox ...\n" ; | |
340 #print Dumper $ox_names ; | |
341 #print Dumper $ox_values ; | |
342 #print "\n---- Neutral loss ...\n" ; | |
343 #print Dumper $loss_names ; | |
344 #print Dumper $loss_values ; | |
345 #print "\n---- Applied transformations ('\@ox_or_loss_values') ...\n" ; | |
346 #print Dumper @ox_or_loss_values ; | |
347 #print "\n---- Masses modif ('\@transfo_init_mzs') ...\n" ; | |
348 #print Dumper @transfo_init_mzs ; | |
349 #print "\n---- Transfo annotation ('\@transfo_annotations') ...\n" ; | |
350 #print Dumper @transfo_annotations ; | |
351 #print "\n---- Queries ('\@transfo_init_mz_queries')...\n" ; | |
352 #print Dumper @transfo_init_mz_queries ; | |
353 #print "\n---- WS Results ('@transfo_init_mz_results')...\n" ; | |
354 #print Dumper @transfo_init_mz_results ; | |
355 #print "\n---- Entries results ('\@entries_results')...\n" ; | |
356 #print Dumper @entries_results ; | |
357 #print "\n---- Entries results numbers ('\@entries_total_nb')...\n" ; | |
358 #print Dumper @entries_total_nb ; | |
359 #print "\n---- Clusters results ('\@clusters_results')...\n" ; | |
360 #print Dumper @clusters_results ; | |
361 #print "\n---- Data model filed...\n" ; | |
362 #print "...with csv->\n" ; | |
363 #print Dumper $lm_matrix ; | |
364 #print "...with html->\n" ; | |
365 #print Dumper $tbody_object ; | |
366 | |
367 | |
368 #==================================================================================== | |
369 # Help subroutine called with -h option | |
370 # number of arguments : 0 | |
371 # Argument(s) : | |
372 # Return : 1 | |
373 #==================================================================================== | |
374 sub help { | |
375 print STDERR " | |
376 | |
377 # wsdl_lipidmaps | |
378 # Input : | |
379 # Author : Franck GIACOMONI and Marion LANDI | |
380 # Email : fgiacomoni\@clermont.inra.fr | |
381 # Version : $version | |
382 # Created : 16/07/2012 | |
383 # Updated: 09/06/2016 - REST implem | |
384 USAGE : | |
385 wsdl_lipidmaps.pl -help | |
386 wsdl_lipidmaps.pl | |
387 -input \$file_input -colmass \$col_mass -colrt \$col_rt -decimal \$decimal -round \$round_type -delta \$tolerance | |
388 -output \$output_result -view \$output_view | |
389 -cat -class -subclass OR -colclassif | |
390 -listneutralloss \$neutral_loss -listoxidation \$oxidation [optionnal] | |
391 "; | |
392 } | |
393 | |
394 |