Mercurial > repos > vipints > rdiff
comparison rDiff/src/octave/importdata.m @ 0:0f80a5141704
version 0.3 uploaded
| author | vipints |
|---|---|
| date | Thu, 14 Feb 2013 23:38:36 -0500 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:0f80a5141704 |
|---|---|
| 1 ## Copyright (C) 2012 Erik Kjellson | |
| 2 ## | |
| 3 ## This file is part of Octave. | |
| 4 ## | |
| 5 ## Octave is free software; you can redistribute it and/or modify it | |
| 6 ## under the terms of the GNU General Public License as published by | |
| 7 ## the Free Software Foundation; either version 3 of the License, or (at | |
| 8 ## your option) any later version. | |
| 9 ## | |
| 10 ## Octave is distributed in the hope that it will be useful, but | |
| 11 ## WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 12 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
| 13 ## General Public License for more details. | |
| 14 ## | |
| 15 ## You should have received a copy of the GNU General Public License | |
| 16 ## along with Octave; see the file COPYING. If not, see | |
| 17 ## <http://www.gnu.org/licenses/>. | |
| 18 | |
| 19 ## -*- texinfo -*- | |
| 20 ## @deftypefn {Function File} {@var{A} =} importdata (@var{fname}) | |
| 21 ## @deftypefnx {Function File} {@var{A} =} importdata (@var{fname}, @var{delimiter}) | |
| 22 ## @deftypefnx {Function File} {@var{A} =} importdata (@var{fname}, @var{delimiter}, @var{header_rows}) | |
| 23 ## @deftypefnx {Function File} {[@var{A}, @var{delimiter}] =} importdata (...) | |
| 24 ## @deftypefnx {Function File} {[@var{A}, @var{delimiter}, @var{header_rows}] =} importdata (...) | |
| 25 ## Importing data from file. | |
| 26 ## | |
| 27 ## Importing the contents of file @var{fname} into workspace. | |
| 28 ## | |
| 29 ## Input parameters: | |
| 30 ## @itemize | |
| 31 ## @item @var{fname} | |
| 32 ## The file name for the file to import. | |
| 33 ## | |
| 34 ## @item @var{delimiter} | |
| 35 ## The character separating columns of data. Use @code{\t} for tab. | |
| 36 ## (Only valid for ascii files) | |
| 37 ## | |
| 38 ## @item @var{header_rows} | |
| 39 ## Number of header rows before the data begins. (Only valid for ascii files) | |
| 40 ## @end itemize | |
| 41 ## | |
| 42 ## Different file types are supported: | |
| 43 ## @itemize | |
| 44 ## @item Ascii table | |
| 45 ## | |
| 46 ## Importing ascii table using the specified number of header rows and | |
| 47 ## the specified delimiter. | |
| 48 ## | |
| 49 ## @item Image file | |
| 50 ## | |
| 51 ## @item @sc{Matlab} file | |
| 52 ## | |
| 53 ## @item Spreadsheet files (depending on external software) | |
| 54 ## | |
| 55 ## @item Wav file | |
| 56 ## | |
| 57 ## @end itemize | |
| 58 ## | |
| 59 ## @seealso{textscan, dlmread, csvread, load} | |
| 60 ## @end deftypefn | |
| 61 | |
| 62 ## Author: Erik Kjellson <erikiiofph7@users.sourceforge.net> | |
| 63 | |
| 64 function [output, delimiter, header_rows] = importdata (varargin) | |
| 65 | |
| 66 | |
| 67 ## Default values | |
| 68 fname = ""; | |
| 69 delimiter = ""; | |
| 70 header_rows = -1; | |
| 71 | |
| 72 ########## | |
| 73 | |
| 74 ## Check input arguments | |
| 75 | |
| 76 if (nargin < 1) | |
| 77 print_usage (); | |
| 78 endif | |
| 79 | |
| 80 fname = varargin{1}; | |
| 81 ## Check that the file name really is a string | |
| 82 if (! ischar (fname)) | |
| 83 error ("importdata: file name needs to be a string"); | |
| 84 endif | |
| 85 if ( strcmpi (fname, "-pastespecial")) | |
| 86 error ("importdata: option -pastespecial not implemented"); | |
| 87 endif | |
| 88 | |
| 89 if (nargin > 1) | |
| 90 delimiter = varargin{2}; | |
| 91 ## Check that the delimiter really is a string | |
| 92 if (!ischar (delimiter)) | |
| 93 error("importdata: delimiter needs to be a character"); | |
| 94 endif | |
| 95 if (length (delimiter) > 1 && !strcmpi (delimiter, "\\t")) | |
| 96 error("importdata: delimiter cannot be longer than 1 character"); | |
| 97 endif | |
| 98 if (strcmpi (delimiter, "\\")) | |
| 99 delimiter = "\\\\"; | |
| 100 endif | |
| 101 endif | |
| 102 | |
| 103 if (nargin > 2) | |
| 104 header_rows = varargin{3}; | |
| 105 if (!isnumeric (header_rows) || header_rows < 0) | |
| 106 error ("importdata: number of header rows needs to be an integer number >= 0"); | |
| 107 endif | |
| 108 endif | |
| 109 | |
| 110 if (nargin > 3) | |
| 111 error ("importdata: too many input arguments"); | |
| 112 endif | |
| 113 | |
| 114 ########## | |
| 115 | |
| 116 ## Check file format | |
| 117 ## Get the extension from the file name. | |
| 118 [d n fileExt v] = fileparts (fname); | |
| 119 ## Make sure file extension is in lower case. | |
| 120 fileExt = lower (fileExt); | |
| 121 | |
| 122 switch fileExt | |
| 123 case {".au", ".snd"} | |
| 124 error (sprintf ("importdata: not implemented for file format %s", | |
| 125 fileExt)); | |
| 126 case ".avi" | |
| 127 error (sprintf ("importdata: not implemented for file format %s", | |
| 128 fileExt)); | |
| 129 case {".bmp", ".cur", ".gif", ".hdf", ".ico", ".jpe", ".jpeg", ".jpg", \ | |
| 130 ".pbm", ".pcx", ".pgm", ".png", ".pnm", ".ppm", ".ras", \ | |
| 131 ".tif", ".tiff", ".xwd"} | |
| 132 delimiter = NaN; | |
| 133 header_rows = 0; | |
| 134 [output.cdata, output.colormap, output.alpha] = imread (fname); | |
| 135 case ".mat" | |
| 136 delimiter = NaN; | |
| 137 header_rows = 0; | |
| 138 output = load (fname); | |
| 139 case {".wk1", ".xls", ".xlsx", ".dbf", ".pxl"} | |
| 140 ## If there's no Excel file support simply fall back to unimplemented.m | |
| 141 output = xlsread (fname); | |
| 142 case {".ods", ".sxc", ".fods", ".uos", ".xml"} | |
| 143 ## unimplemented.m only knows ML functions; odsread isn't one but is in OF | |
| 144 try | |
| 145 output = odsread (fname); | |
| 146 catch | |
| 147 ## Fall back to unimplemented.m. | |
| 148 output = xlsread (fname); | |
| 149 end_try_catch | |
| 150 case {".wav", ".wave"} | |
| 151 delimiter = NaN; | |
| 152 header_rows = 0; | |
| 153 [output.data, output.fs] = wavread (fname); | |
| 154 otherwise | |
| 155 ## Assume the file is in ascii format. | |
| 156 [output, delimiter, header_rows] = \ | |
| 157 importdata_ascii (fname, delimiter, header_rows); | |
| 158 endswitch | |
| 159 | |
| 160 ## If there are any empty fields in the output structure, then remove them | |
| 161 if (isstruct (output) && length (output) == 1) | |
| 162 fields = fieldnames (output); | |
| 163 for i=1:length (fields) | |
| 164 if (isempty (getfield (output, fields{i}))) | |
| 165 output = rmfield (output, fields{i}); | |
| 166 endif | |
| 167 endfor | |
| 168 | |
| 169 ## If only one field is left, replace the structure with the field, | |
| 170 ## i.e. output = output.onlyFieldLeft | |
| 171 | |
| 172 ## Update the list of fields | |
| 173 fields = fieldnames (output); | |
| 174 if (length (fields) == 1) | |
| 175 output = getfield (output, fields{1}); | |
| 176 endif | |
| 177 endif | |
| 178 endfunction | |
| 179 | |
| 180 | |
| 181 ######################################## | |
| 182 | |
| 183 function [output, delimiter, header_rows] = \ | |
| 184 importdata_ascii (fname, delimiter, header_rows) | |
| 185 | |
| 186 ## Define the fields in the output structure so that the order will be | |
| 187 ## correct. | |
| 188 | |
| 189 output.data = []; | |
| 190 output.textdata = []; | |
| 191 output.rowheaders = []; | |
| 192 output.colheaders = []; | |
| 193 | |
| 194 ## Read file into string and count the number of header rows | |
| 195 #file_content = fileread (fname); | |
| 196 file_content_rows={}; | |
| 197 fid=fopen(fname); | |
| 198 currline=0; | |
| 199 firstline=1; | |
| 200 while 1==1 | |
| 201 if not(isempty(currline)) | |
| 202 if currline==-1 | |
| 203 break | |
| 204 end | |
| 205 end | |
| 206 currline=fgetl(fid); | |
| 207 file_content_rows{end+1}=currline; | |
| 208 end | |
| 209 fclose(fid); | |
| 210 file_content_rows={file_content_rows{1:(end-1)}}; | |
| 211 | |
| 212 ## Split the file into rows (using \r\n or \n as delimiters between rows). | |
| 213 #file_content_rows = regexp (file_content, "\r?\n", "split"); | |
| 214 | |
| 215 ## FIXME: guess delimiter, if it isn't defined | |
| 216 if (isempty (delimiter)) | |
| 217 error ("importdata: Guessing delimiter is not implemented yet, you have to specify it."); | |
| 218 endif | |
| 219 | |
| 220 ## FIXME: A more intelligent way to count number of header rows. This | |
| 221 ## is needed e.g. when delimiter=' ' and the header contains spaces... | |
| 222 | |
| 223 ## If number of header rows is undefined, then count the number of | |
| 224 ## header rows by step through row by row and look for the delimiter. | |
| 225 ## Assume that the header can't contain any delimiter. | |
| 226 if (header_rows < 0) | |
| 227 header_rows = 0; | |
| 228 for i=1:length (file_content_rows) | |
| 229 if (isempty (regexp(file_content_rows{i}, delimiter, "once"))) | |
| 230 header_rows++; | |
| 231 else | |
| 232 ## Data part has begun and therefore no more header rows can be | |
| 233 ## found | |
| 234 break; | |
| 235 endif | |
| 236 endfor | |
| 237 endif | |
| 238 | |
| 239 ## Put the header rows in output.textdata. | |
| 240 if (header_rows > 0) | |
| 241 output.textdata = file_content_rows (1:header_rows)'; | |
| 242 endif | |
| 243 | |
| 244 ## If space is the delimiter, then remove spaces in the beginning of | |
| 245 ## each data row. | |
| 246 if (strcmpi (delimiter, " ")) | |
| 247 for i=(header_rows+1):length (file_content_rows) | |
| 248 ## strtrim does not only remove the leading spaces but also the | |
| 249 ## tailing spaces, but that doesn't really matter. | |
| 250 file_content_rows{i} = strtrim (file_content_rows{i}); | |
| 251 endfor | |
| 252 endif | |
| 253 | |
| 254 ## Remove empty data rows. Go through them backwards so that you wont | |
| 255 ## get out of bounds. | |
| 256 for i=length (file_content_rows):-1:(header_rows + 1) | |
| 257 if (length (file_content_rows{i}) < 1) | |
| 258 file_content_rows = [file_content_rows(1:i-1), \ | |
| 259 file_content_rows(i+1:length(file_content_rows))]; | |
| 260 endif | |
| 261 endfor | |
| 262 | |
| 263 ## Count the number of data columns. If there are different number of | |
| 264 ## columns, use the greatest value. | |
| 265 data_columns = 0; | |
| 266 delimiter_pattern = delimiter; | |
| 267 ## If space is the delimiter, then multiple spaces should count as ONE | |
| 268 ## delimiter. Also ignore leading spaces. | |
| 269 if (strcmpi (delimiter, " ")) | |
| 270 delimiter_pattern = ' +'; | |
| 271 endif | |
| 272 for i=(header_rows+1):length(file_content_rows) | |
| 273 data_columns = max (data_columns, | |
| 274 length (regexp (file_content_rows{i}, | |
| 275 delimiter_pattern, "split"))); | |
| 276 endfor | |
| 277 | |
| 278 ## Go through the data and put it in either output.data or | |
| 279 ## output.textdata depending on if it is numeric or not. | |
| 280 output.data = NaN (length (file_content_rows) - header_rows, data_columns); | |
| 281 | |
| 282 cut_rows=zeros(1,data_columns); | |
| 283 for i=(header_rows+1):length(file_content_rows) | |
| 284 ## Only use the row if it contains anything other than white-space | |
| 285 ## characters. | |
| 286 if (any (file_content_rows{i} != " ")) | |
| 287 row_data = regexp (file_content_rows{i}, delimiter_pattern, "split"); | |
| 288 | |
| 289 for j=1:length(row_data) | |
| 290 ## Try to convert the column to a number, if it works put it in | |
| 291 ## output.data, otherwise in output.textdata | |
| 292 if (!isempty (row_data{j})) | |
| 293 data_numeric = str2double (row_data{j}); | |
| 294 if and(!isempty (data_numeric),not(isnan(data_numeric))) | |
| 295 output.data(i-header_rows, j) = data_numeric; | |
| 296 if not(isnan(data_numeric)) | |
| 297 cut_rows(j)=1; | |
| 298 end | |
| 299 else | |
| 300 output.textdata{i,j} = row_data{j}; | |
| 301 endif | |
| 302 endif | |
| 303 endfor | |
| 304 | |
| 305 endif | |
| 306 endfor | |
| 307 output.data=output.data(:,cut_rows>0); | |
| 308 | |
| 309 ## Check wether rowheaders or colheaders should be used | |
| 310 if ((header_rows == data_columns) && (size (output.textdata, 2) == 1)) | |
| 311 output.rowheaders = output.textdata; | |
| 312 elseif (size (output.textdata, 2) == data_columns) | |
| 313 output.colheaders = output.textdata(end,:); | |
| 314 endif | |
| 315 | |
| 316 ## When delimiter = "\\t" convert it to a tab, done for Matlab compatibility. | |
| 317 if (strcmp (delimiter, '\t')) | |
| 318 delimiter = "\t"; | |
| 319 endif | |
| 320 | |
| 321 endfunction | |
| 322 | |
| 323 | |
| 324 function [RET]=regexp(ARG1,ARG2,ARG3) | |
| 325 %ARG3 is always 'split' in this context | |
| 326 if (strcmp (ARG2, '\t')) | |
| 327 ARG2 = "\t"; | |
| 328 endif | |
| 329 RET = strsplit(ARG1,"\t",fixed=true); | |
| 330 endfunction | |
| 331 | |
| 332 ######################################## | |
| 333 | |
| 334 %!test | |
| 335 %! # Comma separated values | |
| 336 %! A = [3.1 -7.2 0; 0.012 6.5 128]; | |
| 337 %! fn = tmpnam (); | |
| 338 %! fid = fopen (fn, "w"); | |
| 339 %! fputs (fid, "3.1,-7.2,0\n0.012,6.5,128"); | |
| 340 %! fclose (fid); | |
| 341 %! [a,d,h] = importdata (fn, ","); | |
| 342 %! unlink (fn); | |
| 343 %! assert (a, A); | |
| 344 %! assert (d, ","); | |
| 345 %! assert (h, 0); | |
| 346 | |
| 347 %!test | |
| 348 %! # Tab separated values | |
| 349 %! A = [3.1 -7.2 0; 0.012 6.5 128]; | |
| 350 %! fn = tmpnam (); | |
| 351 %! fid = fopen (fn, "w"); | |
| 352 %! fputs (fid, "3.1\t-7.2\t0\n0.012\t6.5\t128"); | |
| 353 %! fclose (fid); | |
| 354 %! [a,d,h] = importdata (fn, "\\t"); | |
| 355 %! unlink (fn); | |
| 356 %! assert (a, A); | |
| 357 %! assert (d, "\t"); | |
| 358 %! assert (h, 0); | |
| 359 | |
| 360 %!test | |
| 361 %! # Space separated values, using multiple spaces to align in columns. | |
| 362 %! A = [3.1 -7.2 0; 0.012 6.5 128]; | |
| 363 %! fn = tmpnam (); | |
| 364 %! fid = fopen (fn, "w"); | |
| 365 %! fprintf (fid, "%10.3f %10.3f %10.3f\n", A(1,:)); | |
| 366 %! fprintf (fid, "%10.3f %10.3f %10.3f\n", A(2,:)); | |
| 367 %! fclose (fid); | |
| 368 %! [a,d,h] = importdata (fn, " "); | |
| 369 %! unlink (fn); | |
| 370 %! assert (a, A); | |
| 371 %! assert (d, " "); | |
| 372 %! assert (h, 0); | |
| 373 | |
| 374 %!test | |
| 375 %! # Header | |
| 376 %! A.data = [3.1 -7.2 0; 0.012 6.5 128]; | |
| 377 %! A.textdata = {"This is a header row."; \ | |
| 378 %! "this row does not contain any data, but the next one does."}; | |
| 379 %! fn = tmpnam (); | |
| 380 %! fid = fopen (fn, "w"); | |
| 381 %! fputs (fid, [A.textdata{1} "\n"]); | |
| 382 %! fputs (fid, [A.textdata{2} "\n"]); | |
| 383 %! fputs (fid, "3.1\t-7.2\t0\n0.012\t6.5\t128"); | |
| 384 %! fclose (fid); | |
| 385 %! [a,d,h] = importdata (fn, "\\t"); | |
| 386 %! unlink (fn); | |
| 387 %! assert (a, A); | |
| 388 %! assert (d, "\t"); | |
| 389 %! assert (h, 2); | |
| 390 | |
| 391 %!test | |
| 392 %! # Ignore empty rows containing only spaces | |
| 393 %! A = [3.1 -7.2 0; 0.012 6.5 128]; | |
| 394 %! fn = tmpnam (); | |
| 395 %! fid = fopen (fn, "w"); | |
| 396 %! fprintf (fid, "%10.3f %10.3f %10.3f\n", A(1,:)); | |
| 397 %! fputs (fid, " "); | |
| 398 %! fprintf (fid, "%10.3f %10.3f %10.3f\n", A(2,:)); | |
| 399 %! fclose (fid); | |
| 400 %! [a,d,h] = importdata (fn, " "); | |
| 401 %! unlink (fn); | |
| 402 %! assert (a, A); | |
| 403 %! assert (d, " "); | |
| 404 %! assert (h, 0); | |
| 405 | |
| 406 %!test | |
| 407 %! # Exponentials | |
| 408 %! A = [3.1 -7.2 0; 0.012 6.5 128]; | |
| 409 %! fn = tmpnam (); | |
| 410 %! fid = fopen (fn, "w"); | |
| 411 %! fputs (fid, "+3.1e0\t-72E-1\t0\n12e-3\t6.5\t128"); | |
| 412 %! fclose (fid); | |
| 413 %! [a,d,h] = importdata (fn, "\\t"); | |
| 414 %! unlink (fn); | |
| 415 %! assert (a, A); | |
| 416 %! assert (d, "\t"); | |
| 417 %! assert (h, 0); | |
| 418 | |
| 419 %!test | |
| 420 %! # Missing values | |
| 421 %! A = [3.1 NaN 0; 0.012 6.5 128]; | |
| 422 %! fn = tmpnam (); | |
| 423 %! fid = fopen (fn, "w"); | |
| 424 %! fputs (fid, "3.1\t\t0\n0.012\t6.5\t128"); | |
| 425 %! fclose (fid); | |
| 426 %! [a,d,h] = importdata (fn, "\\t"); | |
| 427 %! unlink (fn); | |
| 428 %! assert (a, A); | |
| 429 %! assert (d, "\t"); | |
| 430 %! assert (h, 0); | |
| 431 | |
| 432 %!test | |
| 433 %! # CRLF for line breaks | |
| 434 %! A = [3.1 -7.2 0; 0.012 6.5 128]; | |
| 435 %! fn = tmpnam (); | |
| 436 %! fid = fopen (fn, "w"); | |
| 437 %! fputs (fid, "3.1\t-7.2\t0\r\n0.012\t6.5\t128"); | |
| 438 %! fclose (fid); | |
| 439 %! [a,d,h] = importdata (fn, "\\t"); | |
| 440 %! unlink (fn); | |
| 441 %! assert (a, A); | |
| 442 %! assert (d, "\t"); | |
| 443 %! assert (h, 0); | |
| 444 |
