| 0 | 1 ## Copyright (C) 2012 Erik Kjellson | 
|  | 2 ## | 
|  | 3 ## This file is part of Octave. | 
|  | 4 ## | 
|  | 5 ## Octave is free software; you can redistribute it and/or modify it | 
|  | 6 ## under the terms of the GNU General Public License as published by | 
|  | 7 ## the Free Software Foundation; either version 3 of the License, or (at | 
|  | 8 ## your option) any later version. | 
|  | 9 ## | 
|  | 10 ## Octave is distributed in the hope that it will be useful, but | 
|  | 11 ## WITHOUT ANY WARRANTY; without even the implied warranty of | 
|  | 12 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU | 
|  | 13 ## General Public License for more details. | 
|  | 14 ## | 
|  | 15 ## You should have received a copy of the GNU General Public License | 
|  | 16 ## along with Octave; see the file COPYING.  If not, see | 
|  | 17 ## <http://www.gnu.org/licenses/>. | 
|  | 18 | 
|  | 19 ## -*- texinfo -*- | 
|  | 20 ## @deftypefn  {Function File} {@var{A} =} importdata (@var{fname}) | 
|  | 21 ## @deftypefnx {Function File} {@var{A} =} importdata (@var{fname}, @var{delimiter}) | 
|  | 22 ## @deftypefnx {Function File} {@var{A} =} importdata (@var{fname}, @var{delimiter},  @var{header_rows}) | 
|  | 23 ## @deftypefnx {Function File} {[@var{A}, @var{delimiter}] =} importdata (...) | 
|  | 24 ## @deftypefnx {Function File} {[@var{A}, @var{delimiter}, @var{header_rows}] =} importdata (...) | 
|  | 25 ## Importing data from file. | 
|  | 26 ## | 
|  | 27 ## Importing the contents of file @var{fname} into workspace. | 
|  | 28 ## | 
|  | 29 ## Input parameters: | 
|  | 30 ## @itemize | 
|  | 31 ## @item @var{fname} | 
|  | 32 ## The file name for the file to import. | 
|  | 33 ## | 
|  | 34 ## @item @var{delimiter} | 
|  | 35 ## The character separating columns of data. Use @code{\t} for tab. | 
|  | 36 ## (Only valid for ascii files) | 
|  | 37 ## | 
|  | 38 ## @item @var{header_rows} | 
|  | 39 ## Number of header rows before the data begins. (Only valid for ascii files) | 
|  | 40 ## @end itemize | 
|  | 41 ## | 
|  | 42 ## Different file types are supported: | 
|  | 43 ## @itemize | 
|  | 44 ## @item Ascii table | 
|  | 45 ## | 
|  | 46 ## Importing ascii table using the specified number of header rows and | 
|  | 47 ## the specified delimiter. | 
|  | 48 ## | 
|  | 49 ## @item Image file | 
|  | 50 ## | 
|  | 51 ## @item @sc{Matlab} file | 
|  | 52 ## | 
|  | 53 ## @item Spreadsheet files (depending on external software) | 
|  | 54 ## | 
|  | 55 ## @item Wav file | 
|  | 56 ## | 
|  | 57 ## @end itemize | 
|  | 58 ## | 
|  | 59 ## @seealso{textscan, dlmread, csvread, load} | 
|  | 60 ## @end deftypefn | 
|  | 61 | 
|  | 62 ## Author: Erik Kjellson <erikiiofph7@users.sourceforge.net> | 
|  | 63 | 
|  | 64 function [output, delimiter, header_rows] = importdata (varargin) | 
|  | 65 | 
|  | 66 | 
|  | 67   ## Default values | 
|  | 68   fname   = ""; | 
|  | 69   delimiter  = ""; | 
|  | 70   header_rows = -1; | 
|  | 71 | 
|  | 72   ########## | 
|  | 73 | 
|  | 74   ## Check input arguments | 
|  | 75 | 
|  | 76   if (nargin < 1) | 
|  | 77     print_usage (); | 
|  | 78   endif | 
|  | 79 | 
|  | 80   fname = varargin{1}; | 
|  | 81   ## Check that the file name really is a string | 
|  | 82   if (! ischar (fname)) | 
|  | 83     error ("importdata: file name needs to be a string"); | 
|  | 84   endif | 
|  | 85   if ( strcmpi (fname, "-pastespecial")) | 
|  | 86     error ("importdata: option -pastespecial not implemented"); | 
|  | 87   endif | 
|  | 88 | 
|  | 89   if (nargin > 1) | 
|  | 90     delimiter = varargin{2}; | 
|  | 91     ## Check that the delimiter really is a string | 
|  | 92     if (!ischar (delimiter)) | 
|  | 93       error("importdata: delimiter needs to be a character"); | 
|  | 94     endif | 
|  | 95     if (length (delimiter) > 1 && !strcmpi (delimiter, "\\t")) | 
|  | 96       error("importdata: delimiter cannot be longer than 1 character"); | 
|  | 97     endif | 
|  | 98     if (strcmpi (delimiter, "\\")) | 
|  | 99       delimiter = "\\\\"; | 
|  | 100     endif | 
|  | 101   endif | 
|  | 102 | 
|  | 103   if (nargin > 2) | 
|  | 104     header_rows = varargin{3}; | 
|  | 105     if (!isnumeric (header_rows) || header_rows < 0) | 
|  | 106       error ("importdata: number of header rows needs to be an integer number >= 0"); | 
|  | 107     endif | 
|  | 108   endif | 
|  | 109 | 
|  | 110   if (nargin > 3) | 
|  | 111     error ("importdata: too many input arguments"); | 
|  | 112   endif | 
|  | 113 | 
|  | 114   ########## | 
|  | 115 | 
|  | 116   ## Check file format | 
|  | 117   ## Get the extension from the file name. | 
|  | 118   [d n fileExt v] = fileparts (fname); | 
|  | 119   ## Make sure file extension is in lower case. | 
|  | 120   fileExt = lower (fileExt); | 
|  | 121 | 
|  | 122   switch fileExt | 
|  | 123     case {".au", ".snd"} | 
|  | 124       error (sprintf ("importdata: not implemented for file format %s", | 
|  | 125                       fileExt)); | 
|  | 126     case ".avi" | 
|  | 127       error (sprintf ("importdata: not implemented for file format %s", | 
|  | 128                       fileExt)); | 
|  | 129     case {".bmp", ".cur", ".gif", ".hdf", ".ico", ".jpe", ".jpeg", ".jpg", \ | 
|  | 130           ".pbm", ".pcx", ".pgm", ".png", ".pnm", ".ppm", ".ras", \ | 
|  | 131           ".tif", ".tiff", ".xwd"} | 
|  | 132       delimiter  = NaN; | 
|  | 133       header_rows = 0; | 
|  | 134       [output.cdata, output.colormap, output.alpha] = imread (fname); | 
|  | 135     case ".mat" | 
|  | 136       delimiter  = NaN; | 
|  | 137       header_rows = 0; | 
|  | 138       output = load (fname); | 
|  | 139     case {".wk1", ".xls", ".xlsx", ".dbf", ".pxl"} | 
|  | 140       ## If there's no Excel file support simply fall back to unimplemented.m | 
|  | 141       output = xlsread (fname); | 
|  | 142     case {".ods", ".sxc", ".fods", ".uos", ".xml"} | 
|  | 143       ## unimplemented.m only knows ML functions; odsread isn't one but is in OF | 
|  | 144       try | 
|  | 145         output = odsread (fname); | 
|  | 146       catch | 
|  | 147         ## Fall back to unimplemented.m. | 
|  | 148         output = xlsread (fname); | 
|  | 149       end_try_catch | 
|  | 150     case {".wav", ".wave"} | 
|  | 151       delimiter  = NaN; | 
|  | 152       header_rows = 0; | 
|  | 153       [output.data, output.fs] = wavread (fname); | 
|  | 154     otherwise | 
|  | 155       ## Assume the file is in ascii format. | 
|  | 156       [output, delimiter, header_rows]  = \ | 
|  | 157           importdata_ascii (fname, delimiter, header_rows); | 
|  | 158   endswitch | 
|  | 159 | 
|  | 160   ## If there are any empty fields in the output structure, then remove them | 
|  | 161   if (isstruct (output) && length (output) == 1) | 
|  | 162     fields = fieldnames (output); | 
|  | 163     for i=1:length (fields) | 
|  | 164       if (isempty (getfield (output, fields{i}))) | 
|  | 165         output = rmfield (output, fields{i}); | 
|  | 166       endif | 
|  | 167     endfor | 
|  | 168 | 
|  | 169     ## If only one field is left, replace the structure with the field, | 
|  | 170     ## i.e. output = output.onlyFieldLeft | 
|  | 171 | 
|  | 172     ## Update the list of fields | 
|  | 173     fields = fieldnames (output); | 
|  | 174     if (length (fields) == 1) | 
|  | 175       output = getfield (output, fields{1}); | 
|  | 176     endif | 
|  | 177   endif | 
|  | 178 endfunction | 
|  | 179 | 
|  | 180 | 
|  | 181 ######################################## | 
|  | 182 | 
|  | 183 function [output, delimiter, header_rows] = \ | 
|  | 184       importdata_ascii (fname, delimiter, header_rows) | 
|  | 185 | 
|  | 186   ## Define the fields in the output structure so that the order will be | 
|  | 187   ## correct. | 
|  | 188 | 
|  | 189   output.data       = []; | 
|  | 190   output.textdata   = []; | 
|  | 191   output.rowheaders = []; | 
|  | 192   output.colheaders = []; | 
|  | 193 | 
|  | 194   ## Read file into string and count the number of header rows | 
|  | 195   #file_content = fileread (fname); | 
|  | 196   file_content_rows={}; | 
|  | 197   fid=fopen(fname); | 
|  | 198   currline=0; | 
|  | 199   firstline=1; | 
|  | 200   while 1==1 | 
|  | 201       if not(isempty(currline)) | 
|  | 202           if currline==-1 | 
|  | 203               break | 
|  | 204           end | 
|  | 205       end | 
|  | 206       currline=fgetl(fid); | 
|  | 207       file_content_rows{end+1}=currline; | 
|  | 208   end | 
|  | 209   fclose(fid); | 
|  | 210   file_content_rows={file_content_rows{1:(end-1)}}; | 
|  | 211 | 
|  | 212   ## Split the file into rows (using \r\n or \n as delimiters between rows). | 
|  | 213   #file_content_rows = regexp (file_content, "\r?\n", "split"); | 
|  | 214 | 
|  | 215   ## FIXME: guess delimiter, if it isn't defined | 
|  | 216   if (isempty (delimiter)) | 
|  | 217     error ("importdata: Guessing delimiter is not implemented yet, you have to specify it."); | 
|  | 218   endif | 
|  | 219 | 
|  | 220   ## FIXME: A more intelligent way to count number of header rows. This | 
|  | 221   ## is needed e.g. when delimiter=' ' and the header contains spaces... | 
|  | 222 | 
|  | 223   ## If number of header rows is undefined, then count the number of | 
|  | 224   ## header rows by step through row by row and look for the delimiter. | 
|  | 225   ## Assume that the header can't contain any delimiter. | 
|  | 226   if (header_rows < 0) | 
|  | 227     header_rows = 0; | 
|  | 228     for i=1:length (file_content_rows) | 
|  | 229       if (isempty (regexp(file_content_rows{i}, delimiter, "once"))) | 
|  | 230         header_rows++; | 
|  | 231       else | 
|  | 232         ## Data part has begun and therefore no more header rows can be | 
|  | 233         ## found | 
|  | 234         break; | 
|  | 235       endif | 
|  | 236     endfor | 
|  | 237   endif | 
|  | 238 | 
|  | 239   ## Put the header rows in output.textdata. | 
|  | 240   if (header_rows > 0) | 
|  | 241     output.textdata   = file_content_rows (1:header_rows)'; | 
|  | 242   endif | 
|  | 243 | 
|  | 244   ## If space is the delimiter, then remove spaces in the beginning of | 
|  | 245   ## each data row. | 
|  | 246   if (strcmpi (delimiter, " ")) | 
|  | 247     for i=(header_rows+1):length (file_content_rows) | 
|  | 248       ## strtrim does not only remove the leading spaces but also the | 
|  | 249       ## tailing spaces, but that doesn't really matter. | 
|  | 250       file_content_rows{i} = strtrim (file_content_rows{i}); | 
|  | 251     endfor | 
|  | 252   endif | 
|  | 253 | 
|  | 254   ## Remove empty data rows. Go through them backwards so that you wont | 
|  | 255   ## get out of bounds. | 
|  | 256   for i=length (file_content_rows):-1:(header_rows + 1) | 
|  | 257     if (length (file_content_rows{i}) < 1) | 
|  | 258       file_content_rows = [file_content_rows(1:i-1), \ | 
|  | 259                            file_content_rows(i+1:length(file_content_rows))]; | 
|  | 260     endif | 
|  | 261   endfor | 
|  | 262 | 
|  | 263   ## Count the number of data columns. If there are different number of | 
|  | 264   ## columns, use the greatest value. | 
|  | 265   data_columns = 0; | 
|  | 266   delimiter_pattern = delimiter; | 
|  | 267   ## If space is the delimiter, then multiple spaces should count as ONE | 
|  | 268   ## delimiter. Also ignore leading spaces. | 
|  | 269   if (strcmpi (delimiter, " ")) | 
|  | 270     delimiter_pattern = ' +'; | 
|  | 271   endif | 
|  | 272   for i=(header_rows+1):length(file_content_rows) | 
|  | 273     data_columns = max (data_columns, | 
|  | 274                         length (regexp (file_content_rows{i}, | 
|  | 275                                         delimiter_pattern, "split"))); | 
|  | 276   endfor | 
|  | 277 | 
|  | 278   ## Go through the data and put it in either output.data or | 
|  | 279   ## output.textdata depending on if it is numeric or not. | 
|  | 280   output.data = NaN (length (file_content_rows) - header_rows, data_columns); | 
|  | 281 | 
|  | 282   cut_rows=zeros(1,data_columns); | 
|  | 283   for i=(header_rows+1):length(file_content_rows) | 
|  | 284     ## Only use the row if it contains anything other than white-space | 
|  | 285     ## characters. | 
|  | 286     if (any (file_content_rows{i} != " ")) | 
|  | 287       row_data = regexp (file_content_rows{i}, delimiter_pattern, "split"); | 
|  | 288 | 
|  | 289       for j=1:length(row_data) | 
|  | 290         ## Try to convert the column to a number, if it works put it in | 
|  | 291         ## output.data, otherwise in output.textdata | 
|  | 292         if (!isempty (row_data{j})) | 
|  | 293           data_numeric = str2double (row_data{j}); | 
|  | 294           if and(!isempty (data_numeric),not(isnan(data_numeric))) | 
|  | 295             output.data(i-header_rows, j) = data_numeric; | 
|  | 296             if not(isnan(data_numeric)) | 
|  | 297                 cut_rows(j)=1; | 
|  | 298             end | 
|  | 299           else | 
|  | 300             output.textdata{i,j} = row_data{j}; | 
|  | 301           endif | 
|  | 302         endif | 
|  | 303       endfor | 
|  | 304 | 
|  | 305     endif | 
|  | 306   endfor | 
|  | 307   output.data=output.data(:,cut_rows>0); | 
|  | 308 | 
|  | 309   ## Check wether rowheaders or colheaders should be used | 
|  | 310   if ((header_rows == data_columns) && (size (output.textdata, 2) == 1)) | 
|  | 311     output.rowheaders = output.textdata; | 
|  | 312   elseif (size (output.textdata, 2) == data_columns) | 
|  | 313     output.colheaders = output.textdata(end,:); | 
|  | 314   endif | 
|  | 315 | 
|  | 316   ## When delimiter = "\\t" convert it to a tab, done for Matlab compatibility. | 
|  | 317   if (strcmp (delimiter, '\t')) | 
|  | 318     delimiter = "\t"; | 
|  | 319   endif | 
|  | 320 | 
|  | 321 endfunction | 
|  | 322 | 
|  | 323 | 
|  | 324 function [RET]=regexp(ARG1,ARG2,ARG3) | 
|  | 325 %ARG3 is always 'split' in this context | 
|  | 326   if (strcmp (ARG2, '\t')) | 
|  | 327     ARG2 = "\t"; | 
|  | 328   endif | 
|  | 329   RET = strsplit(ARG1,"\t",fixed=true); | 
|  | 330 endfunction | 
|  | 331 | 
|  | 332 ######################################## | 
|  | 333 | 
|  | 334 %!test | 
|  | 335 %! # Comma separated values | 
|  | 336 %! A = [3.1 -7.2 0; 0.012 6.5 128]; | 
|  | 337 %! fn  = tmpnam (); | 
|  | 338 %! fid = fopen (fn, "w"); | 
|  | 339 %! fputs (fid, "3.1,-7.2,0\n0.012,6.5,128"); | 
|  | 340 %! fclose (fid); | 
|  | 341 %! [a,d,h] = importdata (fn, ","); | 
|  | 342 %! unlink (fn); | 
|  | 343 %! assert (a, A); | 
|  | 344 %! assert (d, ","); | 
|  | 345 %! assert (h, 0); | 
|  | 346 | 
|  | 347 %!test | 
|  | 348 %! # Tab separated values | 
|  | 349 %! A = [3.1 -7.2 0; 0.012 6.5 128]; | 
|  | 350 %! fn  = tmpnam (); | 
|  | 351 %! fid = fopen (fn, "w"); | 
|  | 352 %! fputs (fid, "3.1\t-7.2\t0\n0.012\t6.5\t128"); | 
|  | 353 %! fclose (fid); | 
|  | 354 %! [a,d,h] = importdata (fn, "\\t"); | 
|  | 355 %! unlink (fn); | 
|  | 356 %! assert (a, A); | 
|  | 357 %! assert (d, "\t"); | 
|  | 358 %! assert (h, 0); | 
|  | 359 | 
|  | 360 %!test | 
|  | 361 %! # Space separated values, using multiple spaces to align in columns. | 
|  | 362 %! A = [3.1 -7.2 0; 0.012 6.5 128]; | 
|  | 363 %! fn  = tmpnam (); | 
|  | 364 %! fid = fopen (fn, "w"); | 
|  | 365 %! fprintf (fid, "%10.3f %10.3f %10.3f\n", A(1,:)); | 
|  | 366 %! fprintf (fid, "%10.3f %10.3f %10.3f\n", A(2,:)); | 
|  | 367 %! fclose (fid); | 
|  | 368 %! [a,d,h] = importdata (fn, " "); | 
|  | 369 %! unlink (fn); | 
|  | 370 %! assert (a, A); | 
|  | 371 %! assert (d, " "); | 
|  | 372 %! assert (h, 0); | 
|  | 373 | 
|  | 374 %!test | 
|  | 375 %! # Header | 
|  | 376 %! A.data = [3.1 -7.2 0; 0.012 6.5 128]; | 
|  | 377 %! A.textdata = {"This is a header row."; \ | 
|  | 378 %!               "this row does not contain any data, but the next one does."}; | 
|  | 379 %! fn  = tmpnam (); | 
|  | 380 %! fid = fopen (fn, "w"); | 
|  | 381 %! fputs (fid, [A.textdata{1} "\n"]); | 
|  | 382 %! fputs (fid, [A.textdata{2} "\n"]); | 
|  | 383 %! fputs (fid, "3.1\t-7.2\t0\n0.012\t6.5\t128"); | 
|  | 384 %! fclose (fid); | 
|  | 385 %! [a,d,h] = importdata (fn, "\\t"); | 
|  | 386 %! unlink (fn); | 
|  | 387 %! assert (a, A); | 
|  | 388 %! assert (d, "\t"); | 
|  | 389 %! assert (h, 2); | 
|  | 390 | 
|  | 391 %!test | 
|  | 392 %! # Ignore empty rows containing only spaces | 
|  | 393 %! A = [3.1 -7.2 0; 0.012 6.5 128]; | 
|  | 394 %! fn  = tmpnam (); | 
|  | 395 %! fid = fopen (fn, "w"); | 
|  | 396 %! fprintf (fid, "%10.3f %10.3f %10.3f\n", A(1,:)); | 
|  | 397 %! fputs (fid, "      "); | 
|  | 398 %! fprintf (fid, "%10.3f %10.3f %10.3f\n", A(2,:)); | 
|  | 399 %! fclose (fid); | 
|  | 400 %! [a,d,h] = importdata (fn, " "); | 
|  | 401 %! unlink (fn); | 
|  | 402 %! assert (a, A); | 
|  | 403 %! assert (d, " "); | 
|  | 404 %! assert (h, 0); | 
|  | 405 | 
|  | 406 %!test | 
|  | 407 %! # Exponentials | 
|  | 408 %! A = [3.1 -7.2 0; 0.012 6.5 128]; | 
|  | 409 %! fn  = tmpnam (); | 
|  | 410 %! fid = fopen (fn, "w"); | 
|  | 411 %! fputs (fid, "+3.1e0\t-72E-1\t0\n12e-3\t6.5\t128"); | 
|  | 412 %! fclose (fid); | 
|  | 413 %! [a,d,h] = importdata (fn, "\\t"); | 
|  | 414 %! unlink (fn); | 
|  | 415 %! assert (a, A); | 
|  | 416 %! assert (d, "\t"); | 
|  | 417 %! assert (h, 0); | 
|  | 418 | 
|  | 419 %!test | 
|  | 420 %! # Missing values | 
|  | 421 %! A = [3.1 NaN 0; 0.012 6.5 128]; | 
|  | 422 %! fn  = tmpnam (); | 
|  | 423 %! fid = fopen (fn, "w"); | 
|  | 424 %! fputs (fid, "3.1\t\t0\n0.012\t6.5\t128"); | 
|  | 425 %! fclose (fid); | 
|  | 426 %! [a,d,h] = importdata (fn, "\\t"); | 
|  | 427 %! unlink (fn); | 
|  | 428 %! assert (a, A); | 
|  | 429 %! assert (d, "\t"); | 
|  | 430 %! assert (h, 0); | 
|  | 431 | 
|  | 432 %!test | 
|  | 433 %! # CRLF for line breaks | 
|  | 434 %! A = [3.1 -7.2 0; 0.012 6.5 128]; | 
|  | 435 %! fn  = tmpnam (); | 
|  | 436 %! fid = fopen (fn, "w"); | 
|  | 437 %! fputs (fid, "3.1\t-7.2\t0\r\n0.012\t6.5\t128"); | 
|  | 438 %! fclose (fid); | 
|  | 439 %! [a,d,h] = importdata (fn, "\\t"); | 
|  | 440 %! unlink (fn); | 
|  | 441 %! assert (a, A); | 
|  | 442 %! assert (d, "\t"); | 
|  | 443 %! assert (h, 0); | 
|  | 444 |