comparison bank_inhouse.pl @ 1:52798007c6b2 draft default tip

Master branch Updating - - Fxx
author fgiacomoni
date Thu, 24 Jan 2019 10:02:05 -0500
parents be582bcd6585
children
comparison
equal deleted inserted replaced
0:be582bcd6585 1:52798007c6b2
81 print "--input $masses_file " if (defined $masses_file) ; 81 print "--input $masses_file " if (defined $masses_file) ;
82 print "--rest $rest_mode " if (defined $rest_mode) ; 82 print "--rest $rest_mode " if (defined $rest_mode) ;
83 print "--nbheader $nbline_header " if (defined $nbline_header) ; 83 print "--nbheader $nbline_header " if (defined $nbline_header) ;
84 print "--colrt $col_rt " if (defined $col_rt) ; 84 print "--colrt $col_rt " if (defined $col_rt) ;
85 print "--rt $manual_rt " if (defined $manual_rt) ; 85 print "--rt $manual_rt " if (defined $manual_rt) ;
86 print "--rt_delta $rt_delta " if (defined $rt_delta) ;
86 print "--colmass $col_mass " if (defined $col_mass) ; 87 print "--colmass $col_mass " if (defined $col_mass) ;
87 print "--mz_delta $mz_delta_type " if (defined $mz_delta_type) ; 88 print "--mz_delta $mz_delta_type " if (defined $mz_delta_type) ;
88 print "--mass_delta $mz_delta " if (defined $mz_delta) ; 89 print "--mass_delta $mz_delta " if (defined $mz_delta) ;
89 print "--mode $mode " if (defined $mode) ; 90 print "--mode $mode " if (defined $mode) ;
90 print "--tissues $tissues " if (defined $tissues) ; 91 print "--tissues $tissues " if (defined $tissues) ;
143 $complete_rows = $ocsv_input->parse_csv_object($complete_csv, \$masses_file) ; 144 $complete_rows = $ocsv_input->parse_csv_object($complete_csv, \$masses_file) ;
144 145
145 ## parse csv ids and masses 146 ## parse csv ids and masses
146 my $ocsv = lib::csv->new() ; 147 my $ocsv = lib::csv->new() ;
147 my $csv = $ocsv->get_csv_object( "\t" ) ; 148 my $csv = $ocsv->get_csv_object( "\t" ) ;
148 if ( ( defined $nbline_header ) and ( $nbline_header > 0 ) ) { $header_choice = 'yes' ; } else{ $header_choice = 'no' ; } 149
149 $masses = $ocsv->get_value_from_csv_multi_header( $csv, $masses_file, $col_mass, $header_choice, $nbline_header ) ; ## retrieve mz values on csv 150 if ( ( defined $nbline_header ) and ( $nbline_header > 0 ) ) { $header_choice = 'yes' ; }
151 else{ $header_choice = 'no' ; }
152
153 ## retrieve mz values on csv
154 $masses = $ocsv->get_value_from_csv_multi_header( $csv, $masses_file, $col_mass, $header_choice, $nbline_header ) ;
155
156 ## retrieve rt values on csv
150 if ( ( defined $col_rt ) and ( $col_rt ne "" ) ) { 157 if ( ( defined $col_rt ) and ( $col_rt ne "" ) ) {
151 $rt = $ocsv->get_value_from_csv_multi_header( $csv, $masses_file, $col_rt, $header_choice, $nbline_header ) ; ## retrieve rt values on csv 158 $rt = $ocsv->get_value_from_csv_multi_header( $csv, $masses_file, $col_rt, $header_choice, $nbline_header ) ; ## retrieve rt values on csv
152 } 159 }
153 } 160 }
154 161
173 my ($min, $max) = (0, 0) ; 180 my ($min, $max) = (0, 0) ;
174 181
175 if ( ( defined $masses ) and ( scalar (@{$masses}) > 0 ) ) { 182 if ( ( defined $masses ) and ( scalar (@{$masses}) > 0 ) ) {
176 $min = min @{$masses} ; 183 $min = min @{$masses} ;
177 $max = max @{$masses} ; 184 $max = max @{$masses} ;
178 # print Dumper $masses ; 185
179 ## TODO: adjust min and max with delta
180 my ( $min_delta, undef ) = $opfws->mz_delta_conversion(\$min, \$mz_delta_type, \$mz_delta) ; 186 my ( $min_delta, undef ) = $opfws->mz_delta_conversion(\$min, \$mz_delta_type, \$mz_delta) ;
181 my ( undef, $max_delta ) = $opfws->mz_delta_conversion(\$max, \$mz_delta_type, \$mz_delta) ; 187 my ( undef, $max_delta ) = $opfws->mz_delta_conversion(\$max, \$mz_delta_type, \$mz_delta) ;
182 188
183 ## get clean range database json from PForest WS : 189 ## get clean range database json from PForest WS :
184 my $pf_json = $opfws->db_pforest_get_clean_range($PF_CONF->{PF_WS_URL}, $PF_CONF->{PF_REST_QUERY_CLEAN_RANGE}, $$min_delta, $$max_delta, $mode) ; 190 my $pf_json = $opfws->db_pforest_get_clean_range($PF_CONF->{PF_WS_URL}, $PF_CONF->{PF_REST_QUERY_CLEAN_RANGE}, $$min_delta, $$max_delta, $mode) ;
191 } 197 }
192 } ## End IF PForest param 198 } ## End IF PForest param
193 else { croak "Can't work : missing a PForest REST paramater\n" ; } 199 else { croak "Can't work : missing a PForest REST paramater\n" ; }
194 } ## End ELSIF PForest 200 } ## End ELSIF PForest
195 elsif ( ( defined $CONF->{'INHOUSE_BANK'} ) and ( $CONF->{'INHOUSE_BANK'} ne '' ) ) { 201 elsif ( ( defined $CONF->{'INHOUSE_BANK'} ) and ( $CONF->{'INHOUSE_BANK'} ne '' ) ) {
196 $col_mzdb = 4; 202
197 $bank_name = "internal_bank"; 203 $col_mzdb = $CONF->{'INHOUSE_BANK_MZ_COLUMN'} ;
204 $bank_name = $CONF->{'INHOUSE_BANK_NAME'} ;
205 my $html_file = $binPath.'/'.$CONF->{'INHOUSE_BANK'} ;
206
198 if ( (defined $col_rt) or (defined $manual_rt) or (defined $rt_delta) ) { croak "No retention time in the internal bank, please use your own bank\n" ; } 207 if ( (defined $col_rt) or (defined $manual_rt) or (defined $rt_delta) ) { croak "No retention time in the internal bank, please use your own bank\n" ; }
199 my $html_file = $binPath.'/'.$CONF->{'INHOUSE_BANK'} ; 208
200 if ( -e $html_file ) { 209 if ( -e $html_file ) {
201 ## parse all csv for later : output csv build 210 ## parse all csv for later : output csv build
202 my $ocsv_input = lib::csv->new() ; 211 my $ocsv_input = lib::csv->new() ;
203 my $complete_csv = $ocsv_input->get_csv_object( "\t" ) ; 212 my $complete_csv = $ocsv_input->get_csv_object( "\t" ) ;
204 ($complete_bank, $bank_head)= $oBih->parse_bank_interest($complete_csv, \$html_file, $col_mzdb-1) ; 213 ($complete_bank, $bank_head)= $oBih->parse_bank_interest($complete_csv, \$html_file, $col_mzdb-1) ;
238 my @sort_masses_bank = sort { $a <=> $b } keys(%$bank); 247 my @sort_masses_bank = sort { $a <=> $b } keys(%$bank);
239 248
240 my $compt_masses = 0; 249 my $compt_masses = 0;
241 foreach my $mz (@$masses) { 250 foreach my $mz (@$masses) {
242 $compt_masses++; 251 $compt_masses++;
243 my ($MZmessage) = $oBih->check_interval($mz, 0, 10000) ; 252 my ($MZmessage) = $oBih->check_interval($mz, $CONF->{'BANK_MZ_MIN'}, $CONF->{'BANK_MZ_MAX'}) ;
244 if ( $MZmessage eq 'OK' ){ 253 if ( $MZmessage eq 'OK' ){
245 my ( $min, $max ) = $oBih->mz_delta_conversion(\$mz, \$mz_delta_type, \$mz_delta) ; 254 my ( $min, $max ) = $oBih->mz_delta_conversion(\$mz, \$mz_delta_type, \$mz_delta) ;
246 255
247 my ($marj_inf) = $oBih->dichotomi_search(\@sort_masses_bank, $min) ; 256 my ($marj_inf) = $oBih->dichotomi_search(\@sort_masses_bank, $min) ;
248 my ($marj_sup) = $oBih->dichotomi_search(\@sort_masses_bank, $max) ; 257 my ($marj_sup) = $oBih->dichotomi_search(\@sort_masses_bank, $max) ;
251 if($$marj_inf != $$marj_sup){ 260 if($$marj_inf != $$marj_sup){
252 if ($$marj_inf == -1){ $$marj_inf=0; } 261 if ($$marj_inf == -1){ $$marj_inf=0; }
253 for (my $i=$$marj_inf; $i<$$marj_sup; $i++){ 262 for (my $i=$$marj_inf; $i<$$marj_sup; $i++){
254 my $bank_tmp = $$bank{$sort_masses_bank[$i]}; 263 my $bank_tmp = $$bank{$sort_masses_bank[$i]};
255 if ( ( defined $rt ) and ( $rt ne "" ) ) { 264 if ( ( defined $rt ) and ( $rt ne "" ) ) {
256 my ($RTmessage) = $oBih->check_interval($$rt[$compt_masses-1], 0, 60) ; 265 my ($RTmessage) = $oBih->check_interval($$rt[$compt_masses-1], $CONF->{'BANK_RT_MIN'}, $CONF->{'BANK_RT_MAX'}) ;
257 if ( $RTmessage eq 'OK' ){ 266 if ( $RTmessage eq 'OK' ){
258 my $bank_rt = []; 267 my $bank_rt = [];
259 for(my $nb_rt=0; $nb_rt<=$#$bank_tmp; $nb_rt++){ 268 for(my $nb_rt=0; $nb_rt<=$#$bank_tmp; $nb_rt++){
260 my ($RTbank) = $oBih->check_interval($bank_tmp->[$nb_rt]->[$rtdb-1], 0, 60) ; 269 my ($RTbank) = $oBih->check_interval($bank_tmp->[$nb_rt]->[$rtdb-1], $CONF->{'BANK_RT_MIN'}, $CONF->{'BANK_RT_MAX'}) ;
261 my ($RTsearch) = $oBih->check_interval($bank_tmp->[$nb_rt]->[$rtdb-1], $$rt[$compt_masses-1]-$rt_delta, $$rt[$compt_masses-1]+$rt_delta) ; 270 my ($RTsearch) = $oBih->check_interval($bank_tmp->[$nb_rt]->[$rtdb-1], $$rt[$compt_masses-1]-$rt_delta, $$rt[$compt_masses-1]+$rt_delta) ;
262 if ( ($RTbank eq 'OK') and ($RTsearch eq 'OK') ){ 271 if ( ($RTbank eq 'OK') and ($RTsearch eq 'OK') ){
263 push (@$result, $bank_tmp->[$nb_rt]) ; 272 push (@$result, $bank_tmp->[$nb_rt]) ;
264 } 273 }
265 elsif ($RTbank ne 'OK'){ croak "At least one retention time in bank is not valid : $RTbank\n" ; } 274 elsif ($RTbank ne 'OK'){ croak "At least one retention time in bank is not valid : $RTbank\n" ; }
280 else{ 289 else{
281 my $result = []; 290 my $result = [];
282 my $compt_masses = 0; 291 my $compt_masses = 0;
283 foreach my $mz (@$masses) { 292 foreach my $mz (@$masses) {
284 $compt_masses++; 293 $compt_masses++;
285 my ($MZmessage) = $oBih->check_interval($mz, 0, 10000) ; 294 my ($MZmessage) = $oBih->check_interval($mz, $CONF->{'BANK_MZ_MIN'}, $CONF->{'BANK_MZ_MAX'}) ;
286 if ( $MZmessage eq 'OK' ){ 295 if ( $MZmessage eq 'OK' ){
287 if ($$bank{$mz}){ 296 if ($$bank{$mz}){
288 my $bank_tmp = $$bank{$mz}; 297 my $bank_tmp = $$bank{$mz};
289 if ( ( defined $rt ) and ( $rt ne "" ) ) { 298 if ( ( defined $rt ) and ( $rt ne "" ) ) {
290 my ($RTmessage) = $oBih->check_interval($$rt[$compt_masses-1], 0, 60) ; 299 my ($RTmessage) = $oBih->check_interval($$rt[$compt_masses-1], $CONF->{'BANK_RT_MIN'}, $CONF->{'BANK_RT_MAX'}) ;
291 if ( $RTmessage eq 'OK' ){my $bank_rt = []; 300 if ( $RTmessage eq 'OK' ){my $bank_rt = [];
292 for(my $nb_rt=0; $nb_rt<=$#$bank_tmp; $nb_rt++){ 301 for(my $nb_rt=0; $nb_rt<=$#$bank_tmp; $nb_rt++){
293 my ($RTbank) = $oBih->check_interval($bank_tmp->[$nb_rt]->[$rtdb-1], 0, 60) ; 302 my ($RTbank) = $oBih->check_interval($bank_tmp->[$nb_rt]->[$rtdb-1], $CONF->{'BANK_RT_MIN'}, $CONF->{'BANK_RT_MAX'}) ;
294 my ($RTsearch) = $oBih->check_interval($bank_tmp->[$nb_rt]->[$rtdb-1], $$rt[$compt_masses-1]-$rt_delta, $$rt[$compt_masses-1]+$rt_delta) ; 303 my ($RTsearch) = $oBih->check_interval($bank_tmp->[$nb_rt]->[$rtdb-1], $$rt[$compt_masses-1]-$rt_delta, $$rt[$compt_masses-1]+$rt_delta) ;
295 if ( ($RTbank eq 'OK') and ($RTsearch eq 'OK') ){ 304 if ( ($RTbank eq 'OK') and ($RTsearch eq 'OK') ){
296 push (@$bank_rt, $bank_tmp->[$nb_rt]) ; 305 push (@$bank_rt, $bank_tmp->[$nb_rt]) ;
297 } 306 }
298 elsif ($RTbank ne 'OK'){ croak "At least one retention time in bank is not valid : $RTbank\n" ; } 307 elsif ($RTbank ne 'OK'){ croak "At least one retention time in bank is not valid : $RTbank\n" ; }
315 else { 324 else {
316 croak "Can't work : missing paramaters (list of ids, masses, delta, ionization, characterization or inhouse_bank)\n" ; 325 croak "Can't work : missing paramaters (list of ids, masses, delta, ionization, characterization or inhouse_bank)\n" ;
317 } ## end ELSE 326 } ## end ELSE
318 327
319 328
320 if ( ( defined $rt ) and ( $rt ne "" ) ) { unshift (@$bank_head, ("DELTA_mass(".$mz_delta.$mz_delta_type.")", "MASS_Result", "DELTA_RT(".$rt_delta.")", "RT_Result")) ; } 329 if ( ( defined $rt ) and ( $rt ne "" ) ) { unshift (@$bank_head, ("MzDelta_Query-Bank(".$mz_delta.$mz_delta_type.")", "MzBank", "RtQuery", "RtDelta_Query-Bank(".$rt_delta."min.)", "RtBank")) ; }
321 else { unshift (@$bank_head, ("DELTA_mass(".$mz_delta.$mz_delta_type.")", "MASS_Result")) ; } 330 else { unshift (@$bank_head, ("MzDelta_Query-Bank(".$mz_delta.$mz_delta_type.")", "MzBank")) ; }
322 331
323 332
324 ## -------------- Produce JSON output ------------------ : 333 ## -------------- Produce JSON output ------------------ :
325 if ( ( defined $out_json ) and ( defined $results ) ) { 334 if ( ( defined $out_json ) and ( defined $results ) ) {
326 open(JSON, '>:utf8', "$out_json") or die "Cant' create the json file\n" ; 335 open(JSON, '>:utf8', "$out_json") or die "Cant' create the json file\n" ;
363 my $ocsv = lib::bih::new() ; 372 my $ocsv = lib::bih::new() ;
364 my $lm_matrix = undef ; 373 my $lm_matrix = undef ;
365 if ( ( $header_choice eq 'yes' ) and ( defined $nbline_header ) and ( $nbline_header > 0 )) { 374 if ( ( $header_choice eq 'yes' ) and ( defined $nbline_header ) and ( $nbline_header > 0 )) {
366 my $header = join("$sep", @$bank_head); 375 my $header = join("$sep", @$bank_head);
367 my $out_head = "BiH_".$bank_name."_(".$header.")"; 376 my $out_head = "BiH_".$bank_name."_(".$header.")";
368 $lm_matrix = $ocsv->set_lm_matrix_object($out_head, $masses, $col_mzdb, $results, $rt, $rtdb, $bank_head, $sep ) ; 377 $lm_matrix = $ocsv->set_bih_matrix_object($out_head, $masses, $col_mzdb, $results, $rt, $rtdb, $bank_head, $sep ) ;
369 $lm_matrix = $ocsv->add_lm_matrix_to_input_matrix($complete_rows, $lm_matrix, $nbline_header-1) ; 378 $lm_matrix = $ocsv->add_bih_matrix_to_input_matrix($complete_rows, $lm_matrix, $nbline_header-1) ;
370 } 379 }
371 elsif ( ( $header_choice eq 'no' ) or ( $nbline_header <= 0 )) { 380 elsif ( ( $header_choice eq 'no' ) or ( $nbline_header <= 0 )) {
372 $lm_matrix = $ocsv->set_lm_matrix_object(undef, $masses, $col_mzdb, $results, $rt, $rtdb, $bank_head, $sep ) ; 381 $lm_matrix = $ocsv->set_bih_matrix_object(undef, $masses, $col_mzdb, $results, $rt, $rtdb, $bank_head, $sep ) ;
373 $lm_matrix = $ocsv->add_lm_matrix_to_input_matrix($complete_rows, $lm_matrix, 0) ; 382 $lm_matrix = $ocsv->add_bih_matrix_to_input_matrix($complete_rows, $lm_matrix, 0) ;
374 } 383 }
375 else { croak "The number of header line is not identifiable\n" ; } 384 else { croak "The number of header line is not identifiable\n" ; }
376 $ocsv->write_csv_skel(\$out_tab, $lm_matrix) ; 385 $ocsv->write_csv_skel(\$out_tab, $lm_matrix) ;
377 } 386 }
378 else { croak "Can't create a tabular output for BiH : your output file is not defined\n" ; } 387 else { croak "Can't create a tabular output for BiH : your output file is not defined\n" ; }
413 print STDERR " 422 print STDERR "
414 bank_inhouse 423 bank_inhouse
415 424
416 # bank_inhouse is a script to query a in house bank (file) using chemical mass and return a list of common names. 425 # bank_inhouse is a script to query a in house bank (file) using chemical mass and return a list of common names.
417 # Input : mass or list of masses 426 # Input : mass or list of masses
418 # Author : Marion LANDI and Franck Giacomoni (for the Rest part) 427 # Author : Marion LANDI and Franck Giacomoni (as maintainers)
419 # Email : franck.giacomoni\@clermont.inra.fr 428 # Email : franck.giacomoni\@inra.fr
420 # Version : 1.1 429 # Version : 1.2.1
421 # Created : 15/10/2014 430 # Created : 15/10/2014
422 # Updated : 15/12/2015 431 # Updated : 24/01/2019
423 USAGE : 432 USAGE :
424 bank_inhouse.pl -masse [mass] 433 bank_inhouse.pl -masse [mass]
425 -tolerance [Delta of mass (Da)] -mode [Ionization type: positive/negative/neutral] 434 -tolerance [Delta of mass (Da)] -mode [Ionization type: positive/negative/neutral]
426 -tissues [Restricted to certain tissues] -- optionnal 435 -tissues [Restricted to certain tissues] -- optionnal
427 -bank_in [file for in personal house bank] -- optionnal 436 -bank_in [file for in personal house bank] -- optionnal
477 =back 486 =back
478 487
479 =head1 AUTHOR 488 =head1 AUTHOR
480 489
481 Marion LANDI E<lt>marion.landi@clermont.inra.frE<gt> 490 Marion LANDI E<lt>marion.landi@clermont.inra.frE<gt>
491 Frank Giacomoni E<lt>franck.giacomoni@inra.frE<gt>
482 492
483 =head1 LICENSE 493 =head1 LICENSE
484 494
485 This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. 495 This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself.
486 496
487 =head1 VERSION 497 =head1 VERSION
488 498
489 version 1 : 15 / 10 / 2014 499 version 1.1.1 : 15 / 10 / 2014
490 500
491 version 2 : 21 / 11 / 2014 501 version 1.1.2 : 21 / 11 / 2014
502
503 version 1.1.3 : 13 / 11 / 2018
504
505 version 1.2.0 : 12 / 12 / 2018
506
507 version 1.2.1 : 24 / 01 / 2019 - new version with no more rest methods
508
492 509
493 =cut 510 =cut