Mercurial > repos > fgiacomoni > bank_inhouse
comparison bank_inhouse.pl @ 1:52798007c6b2 draft default tip
Master branch Updating - - Fxx
author | fgiacomoni |
---|---|
date | Thu, 24 Jan 2019 10:02:05 -0500 |
parents | be582bcd6585 |
children |
comparison
equal
deleted
inserted
replaced
0:be582bcd6585 | 1:52798007c6b2 |
---|---|
81 print "--input $masses_file " if (defined $masses_file) ; | 81 print "--input $masses_file " if (defined $masses_file) ; |
82 print "--rest $rest_mode " if (defined $rest_mode) ; | 82 print "--rest $rest_mode " if (defined $rest_mode) ; |
83 print "--nbheader $nbline_header " if (defined $nbline_header) ; | 83 print "--nbheader $nbline_header " if (defined $nbline_header) ; |
84 print "--colrt $col_rt " if (defined $col_rt) ; | 84 print "--colrt $col_rt " if (defined $col_rt) ; |
85 print "--rt $manual_rt " if (defined $manual_rt) ; | 85 print "--rt $manual_rt " if (defined $manual_rt) ; |
86 print "--rt_delta $rt_delta " if (defined $rt_delta) ; | |
86 print "--colmass $col_mass " if (defined $col_mass) ; | 87 print "--colmass $col_mass " if (defined $col_mass) ; |
87 print "--mz_delta $mz_delta_type " if (defined $mz_delta_type) ; | 88 print "--mz_delta $mz_delta_type " if (defined $mz_delta_type) ; |
88 print "--mass_delta $mz_delta " if (defined $mz_delta) ; | 89 print "--mass_delta $mz_delta " if (defined $mz_delta) ; |
89 print "--mode $mode " if (defined $mode) ; | 90 print "--mode $mode " if (defined $mode) ; |
90 print "--tissues $tissues " if (defined $tissues) ; | 91 print "--tissues $tissues " if (defined $tissues) ; |
143 $complete_rows = $ocsv_input->parse_csv_object($complete_csv, \$masses_file) ; | 144 $complete_rows = $ocsv_input->parse_csv_object($complete_csv, \$masses_file) ; |
144 | 145 |
145 ## parse csv ids and masses | 146 ## parse csv ids and masses |
146 my $ocsv = lib::csv->new() ; | 147 my $ocsv = lib::csv->new() ; |
147 my $csv = $ocsv->get_csv_object( "\t" ) ; | 148 my $csv = $ocsv->get_csv_object( "\t" ) ; |
148 if ( ( defined $nbline_header ) and ( $nbline_header > 0 ) ) { $header_choice = 'yes' ; } else{ $header_choice = 'no' ; } | 149 |
149 $masses = $ocsv->get_value_from_csv_multi_header( $csv, $masses_file, $col_mass, $header_choice, $nbline_header ) ; ## retrieve mz values on csv | 150 if ( ( defined $nbline_header ) and ( $nbline_header > 0 ) ) { $header_choice = 'yes' ; } |
151 else{ $header_choice = 'no' ; } | |
152 | |
153 ## retrieve mz values on csv | |
154 $masses = $ocsv->get_value_from_csv_multi_header( $csv, $masses_file, $col_mass, $header_choice, $nbline_header ) ; | |
155 | |
156 ## retrieve rt values on csv | |
150 if ( ( defined $col_rt ) and ( $col_rt ne "" ) ) { | 157 if ( ( defined $col_rt ) and ( $col_rt ne "" ) ) { |
151 $rt = $ocsv->get_value_from_csv_multi_header( $csv, $masses_file, $col_rt, $header_choice, $nbline_header ) ; ## retrieve rt values on csv | 158 $rt = $ocsv->get_value_from_csv_multi_header( $csv, $masses_file, $col_rt, $header_choice, $nbline_header ) ; ## retrieve rt values on csv |
152 } | 159 } |
153 } | 160 } |
154 | 161 |
173 my ($min, $max) = (0, 0) ; | 180 my ($min, $max) = (0, 0) ; |
174 | 181 |
175 if ( ( defined $masses ) and ( scalar (@{$masses}) > 0 ) ) { | 182 if ( ( defined $masses ) and ( scalar (@{$masses}) > 0 ) ) { |
176 $min = min @{$masses} ; | 183 $min = min @{$masses} ; |
177 $max = max @{$masses} ; | 184 $max = max @{$masses} ; |
178 # print Dumper $masses ; | 185 |
179 ## TODO: adjust min and max with delta | |
180 my ( $min_delta, undef ) = $opfws->mz_delta_conversion(\$min, \$mz_delta_type, \$mz_delta) ; | 186 my ( $min_delta, undef ) = $opfws->mz_delta_conversion(\$min, \$mz_delta_type, \$mz_delta) ; |
181 my ( undef, $max_delta ) = $opfws->mz_delta_conversion(\$max, \$mz_delta_type, \$mz_delta) ; | 187 my ( undef, $max_delta ) = $opfws->mz_delta_conversion(\$max, \$mz_delta_type, \$mz_delta) ; |
182 | 188 |
183 ## get clean range database json from PForest WS : | 189 ## get clean range database json from PForest WS : |
184 my $pf_json = $opfws->db_pforest_get_clean_range($PF_CONF->{PF_WS_URL}, $PF_CONF->{PF_REST_QUERY_CLEAN_RANGE}, $$min_delta, $$max_delta, $mode) ; | 190 my $pf_json = $opfws->db_pforest_get_clean_range($PF_CONF->{PF_WS_URL}, $PF_CONF->{PF_REST_QUERY_CLEAN_RANGE}, $$min_delta, $$max_delta, $mode) ; |
191 } | 197 } |
192 } ## End IF PForest param | 198 } ## End IF PForest param |
193 else { croak "Can't work : missing a PForest REST paramater\n" ; } | 199 else { croak "Can't work : missing a PForest REST paramater\n" ; } |
194 } ## End ELSIF PForest | 200 } ## End ELSIF PForest |
195 elsif ( ( defined $CONF->{'INHOUSE_BANK'} ) and ( $CONF->{'INHOUSE_BANK'} ne '' ) ) { | 201 elsif ( ( defined $CONF->{'INHOUSE_BANK'} ) and ( $CONF->{'INHOUSE_BANK'} ne '' ) ) { |
196 $col_mzdb = 4; | 202 |
197 $bank_name = "internal_bank"; | 203 $col_mzdb = $CONF->{'INHOUSE_BANK_MZ_COLUMN'} ; |
204 $bank_name = $CONF->{'INHOUSE_BANK_NAME'} ; | |
205 my $html_file = $binPath.'/'.$CONF->{'INHOUSE_BANK'} ; | |
206 | |
198 if ( (defined $col_rt) or (defined $manual_rt) or (defined $rt_delta) ) { croak "No retention time in the internal bank, please use your own bank\n" ; } | 207 if ( (defined $col_rt) or (defined $manual_rt) or (defined $rt_delta) ) { croak "No retention time in the internal bank, please use your own bank\n" ; } |
199 my $html_file = $binPath.'/'.$CONF->{'INHOUSE_BANK'} ; | 208 |
200 if ( -e $html_file ) { | 209 if ( -e $html_file ) { |
201 ## parse all csv for later : output csv build | 210 ## parse all csv for later : output csv build |
202 my $ocsv_input = lib::csv->new() ; | 211 my $ocsv_input = lib::csv->new() ; |
203 my $complete_csv = $ocsv_input->get_csv_object( "\t" ) ; | 212 my $complete_csv = $ocsv_input->get_csv_object( "\t" ) ; |
204 ($complete_bank, $bank_head)= $oBih->parse_bank_interest($complete_csv, \$html_file, $col_mzdb-1) ; | 213 ($complete_bank, $bank_head)= $oBih->parse_bank_interest($complete_csv, \$html_file, $col_mzdb-1) ; |
238 my @sort_masses_bank = sort { $a <=> $b } keys(%$bank); | 247 my @sort_masses_bank = sort { $a <=> $b } keys(%$bank); |
239 | 248 |
240 my $compt_masses = 0; | 249 my $compt_masses = 0; |
241 foreach my $mz (@$masses) { | 250 foreach my $mz (@$masses) { |
242 $compt_masses++; | 251 $compt_masses++; |
243 my ($MZmessage) = $oBih->check_interval($mz, 0, 10000) ; | 252 my ($MZmessage) = $oBih->check_interval($mz, $CONF->{'BANK_MZ_MIN'}, $CONF->{'BANK_MZ_MAX'}) ; |
244 if ( $MZmessage eq 'OK' ){ | 253 if ( $MZmessage eq 'OK' ){ |
245 my ( $min, $max ) = $oBih->mz_delta_conversion(\$mz, \$mz_delta_type, \$mz_delta) ; | 254 my ( $min, $max ) = $oBih->mz_delta_conversion(\$mz, \$mz_delta_type, \$mz_delta) ; |
246 | 255 |
247 my ($marj_inf) = $oBih->dichotomi_search(\@sort_masses_bank, $min) ; | 256 my ($marj_inf) = $oBih->dichotomi_search(\@sort_masses_bank, $min) ; |
248 my ($marj_sup) = $oBih->dichotomi_search(\@sort_masses_bank, $max) ; | 257 my ($marj_sup) = $oBih->dichotomi_search(\@sort_masses_bank, $max) ; |
251 if($$marj_inf != $$marj_sup){ | 260 if($$marj_inf != $$marj_sup){ |
252 if ($$marj_inf == -1){ $$marj_inf=0; } | 261 if ($$marj_inf == -1){ $$marj_inf=0; } |
253 for (my $i=$$marj_inf; $i<$$marj_sup; $i++){ | 262 for (my $i=$$marj_inf; $i<$$marj_sup; $i++){ |
254 my $bank_tmp = $$bank{$sort_masses_bank[$i]}; | 263 my $bank_tmp = $$bank{$sort_masses_bank[$i]}; |
255 if ( ( defined $rt ) and ( $rt ne "" ) ) { | 264 if ( ( defined $rt ) and ( $rt ne "" ) ) { |
256 my ($RTmessage) = $oBih->check_interval($$rt[$compt_masses-1], 0, 60) ; | 265 my ($RTmessage) = $oBih->check_interval($$rt[$compt_masses-1], $CONF->{'BANK_RT_MIN'}, $CONF->{'BANK_RT_MAX'}) ; |
257 if ( $RTmessage eq 'OK' ){ | 266 if ( $RTmessage eq 'OK' ){ |
258 my $bank_rt = []; | 267 my $bank_rt = []; |
259 for(my $nb_rt=0; $nb_rt<=$#$bank_tmp; $nb_rt++){ | 268 for(my $nb_rt=0; $nb_rt<=$#$bank_tmp; $nb_rt++){ |
260 my ($RTbank) = $oBih->check_interval($bank_tmp->[$nb_rt]->[$rtdb-1], 0, 60) ; | 269 my ($RTbank) = $oBih->check_interval($bank_tmp->[$nb_rt]->[$rtdb-1], $CONF->{'BANK_RT_MIN'}, $CONF->{'BANK_RT_MAX'}) ; |
261 my ($RTsearch) = $oBih->check_interval($bank_tmp->[$nb_rt]->[$rtdb-1], $$rt[$compt_masses-1]-$rt_delta, $$rt[$compt_masses-1]+$rt_delta) ; | 270 my ($RTsearch) = $oBih->check_interval($bank_tmp->[$nb_rt]->[$rtdb-1], $$rt[$compt_masses-1]-$rt_delta, $$rt[$compt_masses-1]+$rt_delta) ; |
262 if ( ($RTbank eq 'OK') and ($RTsearch eq 'OK') ){ | 271 if ( ($RTbank eq 'OK') and ($RTsearch eq 'OK') ){ |
263 push (@$result, $bank_tmp->[$nb_rt]) ; | 272 push (@$result, $bank_tmp->[$nb_rt]) ; |
264 } | 273 } |
265 elsif ($RTbank ne 'OK'){ croak "At least one retention time in bank is not valid : $RTbank\n" ; } | 274 elsif ($RTbank ne 'OK'){ croak "At least one retention time in bank is not valid : $RTbank\n" ; } |
280 else{ | 289 else{ |
281 my $result = []; | 290 my $result = []; |
282 my $compt_masses = 0; | 291 my $compt_masses = 0; |
283 foreach my $mz (@$masses) { | 292 foreach my $mz (@$masses) { |
284 $compt_masses++; | 293 $compt_masses++; |
285 my ($MZmessage) = $oBih->check_interval($mz, 0, 10000) ; | 294 my ($MZmessage) = $oBih->check_interval($mz, $CONF->{'BANK_MZ_MIN'}, $CONF->{'BANK_MZ_MAX'}) ; |
286 if ( $MZmessage eq 'OK' ){ | 295 if ( $MZmessage eq 'OK' ){ |
287 if ($$bank{$mz}){ | 296 if ($$bank{$mz}){ |
288 my $bank_tmp = $$bank{$mz}; | 297 my $bank_tmp = $$bank{$mz}; |
289 if ( ( defined $rt ) and ( $rt ne "" ) ) { | 298 if ( ( defined $rt ) and ( $rt ne "" ) ) { |
290 my ($RTmessage) = $oBih->check_interval($$rt[$compt_masses-1], 0, 60) ; | 299 my ($RTmessage) = $oBih->check_interval($$rt[$compt_masses-1], $CONF->{'BANK_RT_MIN'}, $CONF->{'BANK_RT_MAX'}) ; |
291 if ( $RTmessage eq 'OK' ){my $bank_rt = []; | 300 if ( $RTmessage eq 'OK' ){my $bank_rt = []; |
292 for(my $nb_rt=0; $nb_rt<=$#$bank_tmp; $nb_rt++){ | 301 for(my $nb_rt=0; $nb_rt<=$#$bank_tmp; $nb_rt++){ |
293 my ($RTbank) = $oBih->check_interval($bank_tmp->[$nb_rt]->[$rtdb-1], 0, 60) ; | 302 my ($RTbank) = $oBih->check_interval($bank_tmp->[$nb_rt]->[$rtdb-1], $CONF->{'BANK_RT_MIN'}, $CONF->{'BANK_RT_MAX'}) ; |
294 my ($RTsearch) = $oBih->check_interval($bank_tmp->[$nb_rt]->[$rtdb-1], $$rt[$compt_masses-1]-$rt_delta, $$rt[$compt_masses-1]+$rt_delta) ; | 303 my ($RTsearch) = $oBih->check_interval($bank_tmp->[$nb_rt]->[$rtdb-1], $$rt[$compt_masses-1]-$rt_delta, $$rt[$compt_masses-1]+$rt_delta) ; |
295 if ( ($RTbank eq 'OK') and ($RTsearch eq 'OK') ){ | 304 if ( ($RTbank eq 'OK') and ($RTsearch eq 'OK') ){ |
296 push (@$bank_rt, $bank_tmp->[$nb_rt]) ; | 305 push (@$bank_rt, $bank_tmp->[$nb_rt]) ; |
297 } | 306 } |
298 elsif ($RTbank ne 'OK'){ croak "At least one retention time in bank is not valid : $RTbank\n" ; } | 307 elsif ($RTbank ne 'OK'){ croak "At least one retention time in bank is not valid : $RTbank\n" ; } |
315 else { | 324 else { |
316 croak "Can't work : missing paramaters (list of ids, masses, delta, ionization, characterization or inhouse_bank)\n" ; | 325 croak "Can't work : missing paramaters (list of ids, masses, delta, ionization, characterization or inhouse_bank)\n" ; |
317 } ## end ELSE | 326 } ## end ELSE |
318 | 327 |
319 | 328 |
320 if ( ( defined $rt ) and ( $rt ne "" ) ) { unshift (@$bank_head, ("DELTA_mass(".$mz_delta.$mz_delta_type.")", "MASS_Result", "DELTA_RT(".$rt_delta.")", "RT_Result")) ; } | 329 if ( ( defined $rt ) and ( $rt ne "" ) ) { unshift (@$bank_head, ("MzDelta_Query-Bank(".$mz_delta.$mz_delta_type.")", "MzBank", "RtQuery", "RtDelta_Query-Bank(".$rt_delta."min.)", "RtBank")) ; } |
321 else { unshift (@$bank_head, ("DELTA_mass(".$mz_delta.$mz_delta_type.")", "MASS_Result")) ; } | 330 else { unshift (@$bank_head, ("MzDelta_Query-Bank(".$mz_delta.$mz_delta_type.")", "MzBank")) ; } |
322 | 331 |
323 | 332 |
324 ## -------------- Produce JSON output ------------------ : | 333 ## -------------- Produce JSON output ------------------ : |
325 if ( ( defined $out_json ) and ( defined $results ) ) { | 334 if ( ( defined $out_json ) and ( defined $results ) ) { |
326 open(JSON, '>:utf8', "$out_json") or die "Cant' create the json file\n" ; | 335 open(JSON, '>:utf8', "$out_json") or die "Cant' create the json file\n" ; |
363 my $ocsv = lib::bih::new() ; | 372 my $ocsv = lib::bih::new() ; |
364 my $lm_matrix = undef ; | 373 my $lm_matrix = undef ; |
365 if ( ( $header_choice eq 'yes' ) and ( defined $nbline_header ) and ( $nbline_header > 0 )) { | 374 if ( ( $header_choice eq 'yes' ) and ( defined $nbline_header ) and ( $nbline_header > 0 )) { |
366 my $header = join("$sep", @$bank_head); | 375 my $header = join("$sep", @$bank_head); |
367 my $out_head = "BiH_".$bank_name."_(".$header.")"; | 376 my $out_head = "BiH_".$bank_name."_(".$header.")"; |
368 $lm_matrix = $ocsv->set_lm_matrix_object($out_head, $masses, $col_mzdb, $results, $rt, $rtdb, $bank_head, $sep ) ; | 377 $lm_matrix = $ocsv->set_bih_matrix_object($out_head, $masses, $col_mzdb, $results, $rt, $rtdb, $bank_head, $sep ) ; |
369 $lm_matrix = $ocsv->add_lm_matrix_to_input_matrix($complete_rows, $lm_matrix, $nbline_header-1) ; | 378 $lm_matrix = $ocsv->add_bih_matrix_to_input_matrix($complete_rows, $lm_matrix, $nbline_header-1) ; |
370 } | 379 } |
371 elsif ( ( $header_choice eq 'no' ) or ( $nbline_header <= 0 )) { | 380 elsif ( ( $header_choice eq 'no' ) or ( $nbline_header <= 0 )) { |
372 $lm_matrix = $ocsv->set_lm_matrix_object(undef, $masses, $col_mzdb, $results, $rt, $rtdb, $bank_head, $sep ) ; | 381 $lm_matrix = $ocsv->set_bih_matrix_object(undef, $masses, $col_mzdb, $results, $rt, $rtdb, $bank_head, $sep ) ; |
373 $lm_matrix = $ocsv->add_lm_matrix_to_input_matrix($complete_rows, $lm_matrix, 0) ; | 382 $lm_matrix = $ocsv->add_bih_matrix_to_input_matrix($complete_rows, $lm_matrix, 0) ; |
374 } | 383 } |
375 else { croak "The number of header line is not identifiable\n" ; } | 384 else { croak "The number of header line is not identifiable\n" ; } |
376 $ocsv->write_csv_skel(\$out_tab, $lm_matrix) ; | 385 $ocsv->write_csv_skel(\$out_tab, $lm_matrix) ; |
377 } | 386 } |
378 else { croak "Can't create a tabular output for BiH : your output file is not defined\n" ; } | 387 else { croak "Can't create a tabular output for BiH : your output file is not defined\n" ; } |
413 print STDERR " | 422 print STDERR " |
414 bank_inhouse | 423 bank_inhouse |
415 | 424 |
416 # bank_inhouse is a script to query a in house bank (file) using chemical mass and return a list of common names. | 425 # bank_inhouse is a script to query a in house bank (file) using chemical mass and return a list of common names. |
417 # Input : mass or list of masses | 426 # Input : mass or list of masses |
418 # Author : Marion LANDI and Franck Giacomoni (for the Rest part) | 427 # Author : Marion LANDI and Franck Giacomoni (as maintainers) |
419 # Email : franck.giacomoni\@clermont.inra.fr | 428 # Email : franck.giacomoni\@inra.fr |
420 # Version : 1.1 | 429 # Version : 1.2.1 |
421 # Created : 15/10/2014 | 430 # Created : 15/10/2014 |
422 # Updated : 15/12/2015 | 431 # Updated : 24/01/2019 |
423 USAGE : | 432 USAGE : |
424 bank_inhouse.pl -masse [mass] | 433 bank_inhouse.pl -masse [mass] |
425 -tolerance [Delta of mass (Da)] -mode [Ionization type: positive/negative/neutral] | 434 -tolerance [Delta of mass (Da)] -mode [Ionization type: positive/negative/neutral] |
426 -tissues [Restricted to certain tissues] -- optionnal | 435 -tissues [Restricted to certain tissues] -- optionnal |
427 -bank_in [file for in personal house bank] -- optionnal | 436 -bank_in [file for in personal house bank] -- optionnal |
477 =back | 486 =back |
478 | 487 |
479 =head1 AUTHOR | 488 =head1 AUTHOR |
480 | 489 |
481 Marion LANDI E<lt>marion.landi@clermont.inra.frE<gt> | 490 Marion LANDI E<lt>marion.landi@clermont.inra.frE<gt> |
491 Frank Giacomoni E<lt>franck.giacomoni@inra.frE<gt> | |
482 | 492 |
483 =head1 LICENSE | 493 =head1 LICENSE |
484 | 494 |
485 This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. | 495 This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. |
486 | 496 |
487 =head1 VERSION | 497 =head1 VERSION |
488 | 498 |
489 version 1 : 15 / 10 / 2014 | 499 version 1.1.1 : 15 / 10 / 2014 |
490 | 500 |
491 version 2 : 21 / 11 / 2014 | 501 version 1.1.2 : 21 / 11 / 2014 |
502 | |
503 version 1.1.3 : 13 / 11 / 2018 | |
504 | |
505 version 1.2.0 : 12 / 12 / 2018 | |
506 | |
507 version 1.2.1 : 24 / 01 / 2019 - new version with no more rest methods | |
508 | |
492 | 509 |
493 =cut | 510 =cut |