comparison wsdl_lipidmaps.pl @ 0:e8bd49794291 draft

Init repository with last lipidmaps_textsearch master version
author fgiacomoni
date Tue, 11 Apr 2017 03:47:06 -0400
parents
children 1276908e8fc4
comparison
equal deleted inserted replaced
-1:000000000000 0:e8bd49794291
1 #! perl
2
3 use strict ;
4 no strict "refs" ;
5 use warnings ;
6 use Carp qw (cluck croak carp confess) ;
7 use Exporter ;
8 use diagnostics ;
9
10 use Data::Dumper ;
11 use POSIX ;
12 use XML::Twig;
13 use Getopt::Long ;
14 use Time::HiRes;
15
16
17 ## Permet de localisez le repertoire du script perl d'origine
18 use FindBin;
19 ## permet de definir la localisation des .pm et .conf
20 use lib $FindBin::Bin ;
21 #my $libPath = $FindBin::Bin."/lib";
22 my $binPath = $FindBin::Bin ;
23
24 ## dedicated lib
25
26 use lib::lipidmaps ;
27 use lib::parser ;
28 use lib::writer ;
29 # more inra lib
30 use lib::conf qw( :ALL ) ;
31 use lib::csv qw( :ALL ) ;
32 use lib::operations qw( :ALL ) ;
33
34 ## Initialized values
35 #
36 my $version = '1.2';
37 my ( $help, $input_file, $line_header, $col_mass, $col_rt, $decimal, $round_type, $delta, $mode ) = ( undef, undef, undef, undef, undef, undef, undef, undef, undef ) ;
38 my ( $list_oxidation, $list_neutral_loss ) = ( undef, undef ) ;
39 my ( $col_classif_id, $selected_cat, $selected_cl, $selected_subcl ) = ( undef, undef, undef, undef ) ;
40 my ( $output_csv_file, $output_html_file ) = ( undef, undef ) ;
41
42 ## Verbose levels (1 OR 3)
43 my $verbose = 3 ;
44
45
46 &GetOptions ( "help|h" => \$help, # HELP
47 "input|i:s" => \$input_file, # path for input file (CSV format) -- Mandatory
48 "lineheader:i" => \$line_header, ## header presence in tabular file
49 "colmass:i" => \$col_mass, # Input file Column containing Masses for query -- Mandatory
50 # "colrt:i" => \$col_rt, # Input file Column containing Retention time
51 "decimal:i" => \$decimal , # Significante decimal on mass -- Mandatory
52 "listoxidation:s" => \$list_oxidation, ## option : liste des atomes a gerer sur les masses experimentales
53 "listneutralloss:s" => \$list_neutral_loss, ## option : liste des atomes a gerer sur les masses experimentales
54 "round:s" => \$round_type, # Type of truncation -- Mandatory
55 "delta:f" => \$delta, # delta of mass -- Mandatory
56 "cat:s" => \$selected_cat, # Number corresponding to the main category in LIPIDMAPS -- Optional
57 "class:s" => \$selected_cl, # Number corresponding to the main classe in LIPIDMAPS -- Optional
58 "subclass:s" => \$selected_subcl, # Number corresponding to the sub class in LIPIDMAPS -- Optional
59 "output:s" => \$output_csv_file, # File+Path for the results (CVS) -- Mandatory
60 "view:s" => \$output_html_file, # File+Path for the view results (HTML) -- Mandatory
61 "colclassif:i" => \$col_classif_id, # Input file Column containing LM classes ID for query -- Optional
62 "mode:s" => \$mode, # mode of the initial data
63 ) ;
64
65 #=============================================================================
66 # EXCEPTIONS
67 #=============================================================================
68 $help and &help ;
69
70 ## --------------- Global parameters ---------------- :
71 my $nb_pages_for_html_out = 1 ;
72
73 ## Conf file
74 my ( $CONF, %RULES, %RECIPES, %TRANSFO ) = ( undef, (), (), () ) ;
75 foreach my $conf ( <$binPath/*.conf> ) {
76 my $oConf = lib::conf::new() ;
77 $CONF = $oConf->as_conf($conf) ;
78 }
79
80 ## -------------- HTML template file ------------------------ :
81 foreach my $html_template ( <$binPath/*.tmpl> ) { $CONF->{'HTML_TEMPLATE'} = $html_template ; }
82
83 ## work with it :
84 ## get RULES lists :
85 foreach (keys (%$CONF)) {
86 if( $_ =~/^RULE/ ) { $RULES{$_} = $CONF->{$_} ; } ## rules for clustering
87 elsif( $_ =~/^RECIPE/ ) { $RECIPES{$_} = $CONF->{$_} ; } ## fields retrieved with each rule
88 elsif( $_ =~/^ANNOT/ ) { $TRANSFO{$_} = $CONF->{$_} ; } ## Transformation annotation in output files
89 }
90
91 ## Init var
92 my ( $init_csv_rows, $init_mzs, $init_rts, $classif_ids, $round_init_mzs ) = ( undef, undef, undef, undef, undef ) ;
93 my ( @ox_or_loss_names, @ox_or_loss_values, @transfo_init_mzs, @transfo_annotations, @transfo_init_mz_queries, @transfo_init_mz_results, @entries_results, @clusters_results, @entries_total_nb ) = ( (), (), (), (), (), (), (), (), () ) ;
94 my ( $ox_names, $ox_values, $loss_names, $loss_values ) = ( [], [], [], [] ) ;
95 my ( $is_header, $tbody_object) = (undef, undef) ;
96
97
98 print "-----------**********START of MAIN LIPIDMAPS -- version $version *********-------------\n" if ($verbose == 3);
99
100 #### --------------------------------- 01 :: Prepare all and Parsing steps on inputs -------------------------------------
101
102 ## Open CVS FILE / Extract and transform Masses
103 if ( ( defined $input_file ) and ( -e $input_file ) ) {
104 print "\n[INFO] Open input file and get values...\n" if ($verbose == 3);
105 ## parse all csv for later : output csv build
106 my $ocsv_input = lib::csv->new() ;
107 my $csv = $ocsv_input->get_csv_object( "\t" ) ;
108 $init_csv_rows = $ocsv_input->parse_csv_object($csv, \$input_file) ;
109
110
111 if ( ( defined $line_header ) and ( $line_header > 0 ) ) { $is_header = 'yes' ; }
112
113 ## parse masses
114 if ( defined $col_mass ) {
115 print "[INFO] Get masses from input file $input_file ...\n" if ($verbose == 3);
116 # print "[INFO] Get RT from input file $input_file ...\n" if ($verbose == 3);
117 my $ocsv = lib::csv->new() ;
118 my $csv = $ocsv->get_csv_object( "\t" ) ;
119 $init_mzs = $ocsv->get_value_from_csv_multi_header( $csv, $input_file, $col_mass, $is_header, $line_header ) ; ## retrieve mz values on csv
120 # $init_rts = $ocsv->get_value_from_csv_multi_header( $csv, $input_file, $col_rt, $is_header, $line_header ) ; ## retrieve rt values on csv
121 }
122
123 ## Adjust the mz to the instrument mode (POS/NEG)
124 if ( ( defined $mode ) and ( ($mode eq 'POS') or ($mode eq 'NEG') ) ) {
125 print "\t [INFO] Apply mass mode transforming (POS to NEU or NEG to NEU) ...\n" if ($verbose == 3);
126 my @mode_init_mzs = () ;
127 my $omode = lib::operations::new() ;
128 foreach my $mz (@$init_mzs) {
129 push (@mode_init_mzs, ${$omode->manage_mode(\$mode, \1, \0.0005486, \1.007825, \$mz)} ) ;
130 }
131
132 if ( (scalar @$init_mzs) == (scalar @mode_init_mzs) ) {
133 $init_mzs = \@mode_init_mzs ;
134 }
135 else {
136 carp "[ERROR] The mode managing process failed and init mzs have been corrompted\n"
137 }
138 }
139 else {
140 print "\t [INFO] Apply no mass mode transforming\n" if ($verbose == 3);
141 }
142
143 ## round masses
144 if ( ( defined $round_type ) and ( defined $decimal ) ) {
145 print "\t [INFO] Apply mass rounding ...\n" if ($verbose == 3);
146 my $oround = lib::operations::new() ;
147 if ( $round_type eq 'truncation' ) { $round_init_mzs = $oround->truncate_nums( $init_mzs, $decimal ) ; }
148 elsif ( $round_type eq 'round' ) { $round_init_mzs = $oround->round_nums( $init_mzs, $decimal ) ; }
149 else { croak "The selected option for data transformation is unknown !\n" ; }
150 }
151 ## parse classif ids -- optionnal
152 if ( defined $col_classif_id ) {
153 print "\t [INFO] Get LM classification IDS from input file $input_file ...\n" if ($verbose == 3);
154 my $ocsv = lib::csv::new() ;
155 my $csv = $ocsv->get_csv_object( "\t" ) ;
156 $classif_ids = $ocsv->get_value_from_csv( $csv, $input_file, $col_classif_id, $is_header, $line_header ) ;
157 }
158
159
160
161 ## Uses N mz and theirs entries per page (see config file).
162 # how many pages you need with your input mz list?
163 # $nb_pages_for_html_out = ceil( scalar(@{$init_mzs} ) / $CONF->{HTML_ENTRIES_PER_PAGE} ) ;
164 # print "[INFO] Your analysis will generate $nb_pages_for_html_out pages of results...\n" if ($verbose == 3);
165
166 }
167 else {
168 print "[ERROR] Can't find any input file $input_file\n" if ($verbose == 3);
169 croak "Can't find any input file $input_file\n" ;
170 }
171
172 #### ------------------- 02 :: optionnal work on masses with neutral loss and/or oxydation == modif : -------------------
173
174 # get and merge ox and neutral loss envt :
175 my $oparser = lib::parser::new() ;
176 if ( ( defined $list_oxidation ) and ( defined $CONF ) ) { ( $ox_names, $ox_values ) = $oparser->get_oxidation_ref( $CONF, $list_oxidation ) ; }
177 if ( @{$ox_values} ) { push( @ox_or_loss_values, @{$ox_values} ) ; push( @ox_or_loss_names, @{$ox_names} ) ; }
178 if ( ( defined $list_neutral_loss ) and ( defined $CONF ) ) { ( $loss_names, $loss_values ) = $oparser->get_neutral_loss_ref( $CONF, $list_neutral_loss ) ; }
179 if ( @{$loss_values} ) { push( @ox_or_loss_values, @{$loss_values} ) ; push( @ox_or_loss_names, @{$loss_names} ) ; }
180
181 # prepare a list of masses indpt of modif (ox/neutral loss) presence.
182 my $init_mz_index = 0 ;
183 my $i = 0 ;
184
185 foreach my $init_mz (@{$round_init_mzs}) {
186
187 my @transfo_values_list = () ;
188 my @transfo_name_list = () ;
189 my $init_annot = 'Init_MZ' ;
190
191 push ( @transfo_values_list, \$init_mz ) ; ## the submitted init mass
192 ## work on values
193 if ( @ox_or_loss_values ) {
194 my $oround = lib::operations::new() ;
195 my $round_transfo_mzs = $oround->round_nums( \@ox_or_loss_values, $decimal ) ; ## We choose to around the number.
196 foreach my $transfo_mz ( @{$round_transfo_mzs} ) {
197 my $osub = lib::operations::new() ;
198 my $transfo_init_mz = $osub->subtract_num( $init_mz, $transfo_mz ) ;
199 push ( @transfo_values_list, $transfo_init_mz ) ;
200 }
201 }
202
203 ## work on annotation for output
204 push ( @transfo_name_list, \$init_annot) ; ## init annot
205 if ( @ox_or_loss_names ) {
206 foreach my $ox_or_loss_name (@ox_or_loss_names) {
207 if ( $TRANSFO{'ANNOT_'.$ox_or_loss_name} ) {
208 my $transfo = $TRANSFO{'ANNOT_'.$ox_or_loss_name} ;
209 push ( @transfo_name_list, \$transfo ) ; }
210 }
211 }
212
213 ## push final arrays
214 push ( @transfo_init_mzs, \@transfo_values_list ) ;
215 push ( @transfo_annotations, \@transfo_name_list ) ;
216
217 ## foreach transfo mass (round and/or modif)
218
219 my ( @queries, @query_results, @query_result_entries, @query_result_entry_nbs, @query_result_clusters ) = ( (), (), (), (), () ) ;
220
221 foreach my $transfo_mz ( @{$transfo_init_mzs[$init_mz_index]} ) {
222 print "[INFO] Prepare the $i.th query with the mz: $$transfo_mz... \n" if ($verbose == 3);
223 ## LM recommandation : If you write a script to automate calls to LMSD,
224 # please be kind and do not hit our server more often than once per 20 seconds.
225 # We may have to kill scripts that hit our server more frequently.
226 Time::HiRes::sleep(0.1); #.1 seconds
227 my ( $cat, $cl, $subcl ) = ( undef, undef, undef ) ;
228 # if ( $i >= ( scalar( @transfos_values )-1 ) ) { $i = 0 ; } ## manage the modif for each masses.
229
230 ## get the classif level :
231 if ( defined $classif_ids ) {
232
233 if ( $classif_ids->[$i] ) {
234 my $olevel = lib::parser::new() ;
235 $cat = $olevel->set_category( $classif_ids->[$i] ) ;
236 $cl = $olevel->set_class( $classif_ids->[$i] ) ;
237 $subcl = $olevel->set_subclass( $classif_ids->[$i] ) ;
238 # ( $cat, $cl, $subcl ) = ( $$cat, $$cl, $$subcl ) ;
239 }
240 else { croak "This information is not available in your parsing ids\n" ; }
241 }
242 else {
243 if ( ( defined $selected_subcl) or ( defined $selected_cl ) or ( defined $selected_cat ) ) {
244 if ( ( $selected_cat !~ /^NA/ ) ) { ( $cat ) = ( \$selected_cat ) ; }
245 if ( ( $selected_cl !~ /^NA(.*)/ ) ) { ( $cl ) = ( \$selected_cl ) ; }
246 if ( ( $selected_subcl !~ /^NA(.*)/ ) ) { ( $subcl ) = ( \$selected_subcl ) ; }
247 }
248 else { croak "No selected category or classification ids list\n" ; }
249 }
250
251 ## buid and get http query :
252 my $oquery = lib::lipidmaps::new() ;
253 my $ref_http_query = $oquery->build_lm_mass_query( \$CONF->{'SEARCH_URL'}, \$delta, $cat, $cl, $subcl ) ; ## build the query for LM WS, return a list of http, get method
254 print "\t[INFO] Exec $$ref_http_query \n" if ($verbose == 3);
255
256 ## set entries clusters
257 my ( $http_result_mz, $http_query_mz ) = $oquery->get_lm_mass_query($ref_http_query, $transfo_mz) ; ## execute the query, return a list of non-splited lm_entries.
258 my ( $mz_entries_results, $mz_entries_nb, $mz_clusters_results ) = ( undef, undef, undef ) ;
259 if ( (defined $http_result_mz) and ( $$http_result_mz ne '' ) ) { # avoid empty LM results
260 ( $mz_entries_results, $mz_entries_nb ) = $oquery->get_lm_entry_object($http_result_mz, $transfo_mz) ; ## get all features of each entry and return a list of features keept in a hash
261 $mz_clusters_results = $oquery->get_cluster_object($mz_entries_results, \%RULES, \%RECIPES) ; ## clustering all entries and return a list of clusters keept in a hash
262 print "\t[INFO] The query return $$mz_entries_nb entries\n" if ($verbose == 3);
263 }
264 else { # manage empty LM results
265 ( $mz_entries_results, $mz_entries_nb, $mz_clusters_results ) = ( [], \0, [] ) ;
266 print "\t[INFO] The query return none entry with LM\n" if ($verbose == 3);
267 }
268
269 push( @queries, $http_query_mz ) ;
270 push( @query_results, $http_result_mz ) ;
271 push( @query_result_entries, $mz_entries_results ) ;
272 push( @query_result_entry_nbs, $mz_entries_nb ) ;
273 push( @query_result_clusters, $mz_clusters_results ) ;
274
275 } ## end foreach transfo_mz
276
277 $i++ ; # implem the mz rank
278
279 push( @transfo_init_mz_queries, \@queries ) ;
280 push( @transfo_init_mz_results, \@query_results ) ;
281 push( @entries_results, \@query_result_entries ) ;
282 push( @entries_total_nb, \@query_result_entry_nbs ) ;
283 push( @clusters_results, \@query_result_clusters ) ;
284
285 $init_mz_index++ ;
286 } ## end foreach init_mz
287
288
289 #### -------------------------------- 05 :: Writes LM results --------------------------------------------
290
291 # prepare data and write html output :
292 if ( defined $output_html_file) {
293 ## Adjust html output with only mz with records
294 my ($nb_pages, $total_entries) = (0, 0) ;
295 foreach (@entries_total_nb) {
296 foreach my $nb ( @{$_} ) { $total_entries += $$nb ; }
297 if ($total_entries > 0) { $nb_pages++ ; }
298 $total_entries = 0 ;
299 }
300
301 $nb_pages_for_html_out = ceil( $nb_pages / $CONF->{HTML_ENTRIES_PER_PAGE} ) ;
302 print "[INFO] write HTML output file containing $nb_pages_for_html_out pages\n" if ($verbose == 3);
303
304 my $ohtml = lib::writer->new() ;
305 $tbody_object = $ohtml->set_html_tbody_object( $nb_pages_for_html_out ) ;
306 $tbody_object = $ohtml->add_mz_to_tbody_object( $tbody_object, $CONF->{HTML_ENTRIES_PER_PAGE}, $init_mzs, \@entries_total_nb) ;
307 $tbody_object = $ohtml->add_transformation_to_tbody_object( \@transfo_init_mzs, \@transfo_annotations, $tbody_object ) ;
308 $tbody_object = $ohtml->add_cluster_to_tbody_object( \@transfo_init_mzs, \@clusters_results, $tbody_object ) ;
309 $tbody_object = $ohtml->add_entry_to_tbody_object( \@transfo_init_mzs, \@clusters_results, \@entries_results, $tbody_object ) ;
310
311 $tbody_object = $ohtml->sort_tbody_object($tbody_object) ;
312
313 my $output_html = $ohtml->write_html_skel(\$output_html_file, $tbody_object, $nb_pages_for_html_out, $CONF->{'HTML_TEMPLATE'}, $CONF->{'JS_GALAXY_PATH'}, $CONF->{'CSS_GALAXY_PATH'}) ;
314 }
315
316
317 #write csv ouput : add 'lipidmaps' column to input file
318 my $lm_matrix = undef ;
319 my $ocsv = lib::writer->new() ;
320 if ( defined $is_header ) { $lm_matrix = $ocsv->set_lm_matrix_object('LIPIDMAPS(score::name::mz::formula::adduct::id)', $init_mzs, \@transfo_annotations, \@clusters_results ) ; }
321 else { $lm_matrix = $ocsv->set_lm_matrix_object( undef, $init_mzs, \@transfo_annotations, \@clusters_results ) ; }
322
323
324 $lm_matrix = $ocsv->add_lm_matrix_to_input_matrix($init_csv_rows, $lm_matrix) ;
325 $ocsv->write_csv_skel(\$output_csv_file, $lm_matrix) ;
326 print "[INFO] write CSV output file\n" if ($verbose == 3);
327
328 print "-----------**********END of MAIN LIPIDMAPS -- version $version *********-------------\n" if ($verbose == 3);
329
330 #print "-----------**********RETURNS*********-------------\n" ;
331 #print "\n----- Init Input Data in CSV -----\n" ;
332 #print Dumper $init_csv_rows ;
333 #print "\n---- Init masses parsed ...\n" ;
334 #print Dumper $init_mzs ;
335 #print "\n---- Init rts parsed ...\n" ;
336 #print Dumper $init_rts ;
337 #print "\n---- Init masses arounded ...\n" ;
338 #print Dumper $round_init_mzs ;
339 #print "\n---- Ox ...\n" ;
340 #print Dumper $ox_names ;
341 #print Dumper $ox_values ;
342 #print "\n---- Neutral loss ...\n" ;
343 #print Dumper $loss_names ;
344 #print Dumper $loss_values ;
345 #print "\n---- Applied transformations ('\@ox_or_loss_values') ...\n" ;
346 #print Dumper @ox_or_loss_values ;
347 #print "\n---- Masses modif ('\@transfo_init_mzs') ...\n" ;
348 #print Dumper @transfo_init_mzs ;
349 #print "\n---- Transfo annotation ('\@transfo_annotations') ...\n" ;
350 #print Dumper @transfo_annotations ;
351 #print "\n---- Queries ('\@transfo_init_mz_queries')...\n" ;
352 #print Dumper @transfo_init_mz_queries ;
353 #print "\n---- WS Results ('@transfo_init_mz_results')...\n" ;
354 #print Dumper @transfo_init_mz_results ;
355 #print "\n---- Entries results ('\@entries_results')...\n" ;
356 #print Dumper @entries_results ;
357 #print "\n---- Entries results numbers ('\@entries_total_nb')...\n" ;
358 #print Dumper @entries_total_nb ;
359 #print "\n---- Clusters results ('\@clusters_results')...\n" ;
360 #print Dumper @clusters_results ;
361 #print "\n---- Data model filed...\n" ;
362 #print "...with csv->\n" ;
363 #print Dumper $lm_matrix ;
364 #print "...with html->\n" ;
365 #print Dumper $tbody_object ;
366
367
368 #====================================================================================
369 # Help subroutine called with -h option
370 # number of arguments : 0
371 # Argument(s) :
372 # Return : 1
373 #====================================================================================
374 sub help {
375 print STDERR "
376
377 # wsdl_lipidmaps
378 # Input :
379 # Author : Franck GIACOMONI and Marion LANDI
380 # Email : fgiacomoni\@clermont.inra.fr
381 # Version : $version
382 # Created : 16/07/2012
383 # Updated: 09/06/2016 - REST implem
384 USAGE :
385 wsdl_lipidmaps.pl -help
386 wsdl_lipidmaps.pl
387 -input \$file_input -colmass \$col_mass -colrt \$col_rt -decimal \$decimal -round \$round_type -delta \$tolerance
388 -output \$output_result -view \$output_view
389 -cat -class -subclass OR -colclassif
390 -listneutralloss \$neutral_loss -listoxidation \$oxidation [optionnal]
391 ";
392 }
393
394