comparison hmdb/wsdl_hmdb.pl @ 0:9583f9772198 draft

Init and uploaded
author fgiacomoni
date Thu, 28 Jan 2016 10:52:26 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:9583f9772198
1 #!perl
2
3 ## script : wsdl_hmdb.pl
4 #=============================================================================
5 # Included modules and versions
6 #=============================================================================
7 ## Perl modules
8 use strict ;
9 use warnings ;
10 use Carp qw (cluck croak carp) ;
11
12 use Data::Dumper ;
13 use Getopt::Long ;
14 use Text::CSV ;
15 use POSIX ;
16 use FindBin ; ## Permet de localisez le repertoire du script perl d'origine
17
18 ## Specific Modules (Home made...)
19 use lib $FindBin::Bin ;
20 my $binPath = $FindBin::Bin ;
21 use lib::hmdb qw( :ALL ) ;
22 ## PFEM Perl Modules
23 use lib::conf qw( :ALL ) ;
24 use lib::csv qw( :ALL ) ;
25
26 ## Initialized values
27 my ( $help ) = undef ;
28 my ( $mass ) = undef ;
29 my ( $masses_file, $col_id, $col_mass, $header_choice, $nbline_header ) = ( undef, undef, undef, undef, undef ) ;
30 my ( $delta, $molecular_species, $out_tab, $out_html, $out_xls ) = ( undef, undef, undef, undef, undef ) ;
31
32
33 #=============================================================================
34 # Manage EXCEPTIONS
35 #=============================================================================
36
37 &GetOptions ( "h" => \$help, # HELP
38 "mass:s" => \$mass, ## option : one masse
39 "masses:s" => \$masses_file, ## option : path to the input
40 "header_choice:s" => \$header_choice, ## Presence or not of header in tabular file
41 "nblineheader:i" => \$nbline_header, ## numbre of header line present in file
42 "colfactor:i" => \$col_mass, ## Column id for retrieve formula list in tabular file
43 "delta:f" => \$delta,
44 "mode:s" => \$molecular_species, ## Molecular species (positive/negative/neutral)
45 "output|o:s" => \$out_tab, ## option : path to the ouput (tabular : input+results )
46 "view|v:s" => \$out_html, ## option : path to the results view (output2)
47 "outputxls:s" => \$out_xls, ## option : path to the xls-like format output
48 ) ;
49
50 #=============================================================================
51 # EXCEPTIONS
52 #=============================================================================
53 $help and &help ;
54
55 #=============================================================================
56 # MAIN SCRIPT
57 #=============================================================================
58
59
60 ## -------------- Conf file ------------------------ :
61 my ( $CONF ) = ( undef ) ;
62 foreach my $conf ( <$binPath/*.cfg> ) {
63 my $oConf = lib::conf::new() ;
64 $CONF = $oConf->as_conf($conf) ;
65 }
66
67 ## -------------- HTML template file ------------------------ :
68 foreach my $html_template ( <$binPath/*.tmpl> ) { $CONF->{'HTML_TEMPLATE'} = $html_template ; }
69
70
71 ## --------------- Global parameters ---------------- :
72 my ( $ids, $masses, $results ) = ( undef, undef, undef ) ;
73 my ( $complete_rows, $nb_pages_for_html_out ) = ( undef, 1 ) ;
74 my $search_condition = "Search params : Molecular specie = $molecular_species / delta (mass-to-charge ratio) = $delta" ;
75
76 ## --------------- retrieve input data -------------- :
77
78 ## manage only one mass
79 if ( ( defined $mass ) and ( $mass ne '' ) ) {
80 my @masses = split(" ", $mass);
81 $masses = \@masses ;
82 for (my $i=1 ; $i<=$#masses+1 ; $i++){ push (@$ids,"mz_0".sprintf("%04s", $i ) ); }
83 } ## END IF
84 ## manage csv file containing list of masses
85 elsif ( ( defined $masses_file ) and ( $masses_file ne "" ) and ( -e $masses_file ) ) {
86 ## parse all csv for later : output csv build
87 my $ocsv_input = lib::csv->new() ;
88 my $complete_csv = $ocsv_input->get_csv_object( "\t" ) ;
89 $complete_rows = $ocsv_input->parse_csv_object($complete_csv, \$masses_file) ;
90
91 ## parse masses and set ids
92 my $ocsv = lib::csv->new() ;
93 my $csv = $ocsv->get_csv_object( "\t" ) ;
94 if ( ( !defined $nbline_header ) or ( $nbline_header < 0 ) ) { $nbline_header = 0 ; }
95 $masses = $ocsv->get_value_from_csv_multi_header( $csv, $masses_file, $col_mass, $header_choice, $nbline_header ) ; ## retrieve mz values on csv
96 my $nbmz = @$masses ;
97 for (my $i=1 ; $i<=$nbmz+1 ; $i++){ push (@$ids,"mz_0".sprintf("%04s", $i ) ); }
98 }
99 else {
100 warn "[warning] Input data are missing : none mass or file of masses\n" ;
101 &help ;
102 }
103
104 ## ---------------- launch queries -------------------- :
105
106 if ( ( defined $delta ) and ( $delta > 0 ) and ( defined $molecular_species ) and ( $molecular_species ne '' ) ) {
107 ## prepare masses list and execute query
108 my $oHmdb = lib::hmdb::new() ;
109 my $hmdb_pages = undef ;
110
111 $results = [] ; # prepare arrays ref
112 my $submasses = $oHmdb->extract_sub_mz_lists($masses, $CONF->{HMDB_LIMITS} ) ;
113
114 ## get the hmdb server status by a test query - continuous queries or kill script.
115 my $status = $oHmdb->test_matches_from_hmdb_ua() ;
116 $oHmdb->check_state_from_hmdb_ua($status) ; ## can kill the script execution
117
118 foreach my $mzs ( @{$submasses} ) {
119
120 my $result = undef ;
121 my ( $hmdb_masses, $nb_masses_to_submit ) = $oHmdb->prepare_multi_masses_query($mzs) ;
122 $hmdb_pages = $oHmdb->get_matches_from_hmdb_ua($hmdb_masses, $delta, $molecular_species) ;
123 $result = $oHmdb->parse_hmdb_csv_results($hmdb_pages, $mzs) ; ## hash format result
124
125 $results = [ @$results, @$result ] ;
126 }
127
128 ## Uses N mz and theirs entries per page (see config file).
129 # how many pages you need with your input mz list?
130 $nb_pages_for_html_out = ceil( scalar(@{$masses} ) / $CONF->{HTML_ENTRIES_PER_PAGE} ) ;
131
132 # print Dumper $results ;
133 }
134 else {
135 croak "Can't work with HMDB : missing paramaters (list of masses, delta or molecular species)\n" ;
136 } ## end ELSE
137
138 ## -------------- Produce HTML/CSV output ------------------ :
139
140 if ( ( defined $out_html ) and ( defined $results ) ) {
141 my $oHtml = lib::hmdb::new() ;
142 my ($tbody_object) = $oHtml->set_html_tbody_object( $nb_pages_for_html_out, $CONF->{HTML_ENTRIES_PER_PAGE} ) ;
143 ($tbody_object) = $oHtml->add_mz_to_tbody_object($tbody_object, $CONF->{HTML_ENTRIES_PER_PAGE}, $masses, $ids) ;
144 ($tbody_object) = $oHtml->add_entries_to_tbody_object($tbody_object, $CONF->{HTML_ENTRIES_PER_PAGE}, $masses, $results) ;
145 my $output_html = $oHtml->write_html_skel(\$out_html, $tbody_object, $nb_pages_for_html_out, $search_condition, $CONF->{'HTML_TEMPLATE'}, $CONF->{'JS_GALAXY_PATH'}, $CONF->{'CSS_GALAXY_PATH'}) ;
146
147 } ## END IF
148 else {
149 warn "Can't create a HTML output for HMDB : no result found or your output file is not defined\n" ;
150 }
151
152 if ( ( defined $out_tab ) and ( defined $results ) ) {
153 # produce a csv based on METLIN format
154 my $ocsv = lib::hmdb::new() ;
155 if (defined $masses_file) {
156 my $lm_matrix = undef ;
157 if ( ( defined $nbline_header ) and ( $header_choice eq 'yes' ) ) {
158 # $lm_matrix = $ocsv->set_lm_matrix_object('hmdb', $masses, $results ) ;
159 $lm_matrix = $ocsv->set_hmdb_matrix_object_with_ids('hmdb', $masses, $results ) ;
160 $lm_matrix = $ocsv->add_lm_matrix_to_input_matrix($complete_rows, $lm_matrix, $nbline_header-1) ;
161 }
162 elsif ( ( $header_choice eq 'no' ) ) {
163 # $lm_matrix = $ocsv->set_lm_matrix_object(undef, $masses, $results ) ;
164 $lm_matrix = $ocsv->set_hmdb_matrix_object_with_ids(undef, $masses, $results ) ;
165 $lm_matrix = $ocsv->add_lm_matrix_to_input_matrix($complete_rows, $lm_matrix, 0) ;
166 }
167 $ocsv->write_csv_skel(\$out_tab, $lm_matrix) ;
168 }
169 elsif (defined $mass) {
170 $ocsv->write_csv_one_mass($masses, $ids, $results, $out_tab) ;
171 }
172 } ## END IF
173 else {
174 warn "Can't create a tabular output for HMDB : no result found or your output file is not defined\n" ;
175 }
176
177 ## Write XLS like format
178 if ( ( defined $out_xls ) and ( defined $results ) ) {
179 my $ocsv = lib::hmdb::new() ;
180 $ocsv->write_csv_one_mass($masses, $ids, $results, $out_xls) ;
181 }
182
183
184 #====================================================================================
185 # Help subroutine called with -h option
186 # number of arguments : 0
187 # Argument(s) :
188 # Return : 1
189 #====================================================================================
190 sub help {
191 print STDERR "
192 help of wsdl_hmdb
193
194 # wsdl_hmdb is a script to query HMDB website using mz and return a list of candidates sent by HMDB based on the ms search tool.
195 # Input : formula or list of formula
196 # Author : Franck Giacomoni and Marion Landi
197 # Email : fgiacomoni\@clermont.inra.fr
198 # Version : 1.4
199 # Created : 08/07/2012
200 # Updated : 21/01/2016
201 USAGE :
202 wsdl_hmdb.pl -mass [one mass or a string list of exact masses] -delta [mz delta] -mode [molecular species: positive|negative|neutral] -output [output tabular file] -view [output html file]
203
204 or
205 wsdl_hmdb.pl -masses [an input file of mzs] -colfactor [col of mz] -header_choice [yes|no] -nblineheader [nb of lines containing file header : 0-n]
206 -delta [mz delta] -mode [molecular species: positive|negative|neutral] -output [output tabular file] -view [output html file]
207
208 or
209 wsdl_hmdb.pl -h for help
210
211 ";
212 exit(1);
213 }
214
215 ## END of script - F Giacomoni
216
217 __END__
218
219 =head1 NAME
220
221 wsdl_hmdb.pl -- script to query HMDB website using mz and return a list of candidates sent by HMDB based on the ms search tool.
222
223 =head1 USAGE
224
225 wsdl_hmdb.pl -mass [one mass or a string list of exact masses] -delta [mz delta] -mode [molecular species: positive|negative|neutral] -output [output tabular file] -view [output html file]
226
227 or
228 wsdl_hmdb.pl -masses [an input file of mzs] -colfactor [col of mz] -header_choice [yes|no] -nblineheader [nb of lines containing file header : 0-n]
229 -delta [mz delta] -mode [molecular species: positive|negative|neutral] -output [output tabular file] -view [output html file]
230
231 =head1 SYNOPSIS
232
233 This script manages batch queries on HMDB server.
234
235 =head1 DESCRIPTION
236
237 This main program is a script to query HMDB website using mz and return a list of candidates sent by HMDB based on the ms search tool.
238
239 =over 4
240
241 =item B<function01>
242
243 =item B<function02>
244
245 =back
246
247 =head1 AUTHOR
248
249 Franck Giacomoni E<lt>franck.giacomoni@clermont.inra.frE<gt>
250
251 =head1 LICENSE
252
253 This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself.
254
255 =head1 VERSION
256
257 version 1.0 : 06 / 06 / 2013
258
259 version 1.2 : 27 / 01 / 2014
260
261 version 1.3 : 19 / 11 / 2014
262
263 version 1.4 : 21 / 01 / 2016 - a clean version for community
264
265 =cut