Mercurial > repos > youngkim > ezbamqc
comparison ezBAMQC/src/htslib/cram/open_trace_file.c @ 0:dfa3745e5fd8
Uploaded
| author | youngkim |
|---|---|
| date | Thu, 24 Mar 2016 17:12:52 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:dfa3745e5fd8 |
|---|---|
| 1 /* | |
| 2 Author: James Bonfield | |
| 3 | |
| 4 Copyright (c) 2000-2001 MEDICAL RESEARCH COUNCIL | |
| 5 All rights reserved | |
| 6 | |
| 7 Redistribution and use in source and binary forms, with or without | |
| 8 modification, are permitted provided that the following conditions are met: | |
| 9 | |
| 10 1. Redistributions of source code must retain the above copyright notice, | |
| 11 this list of conditions and the following disclaimer. | |
| 12 | |
| 13 2. Redistributions in binary form must reproduce the above copyright notice, | |
| 14 this list of conditions and the following disclaimer in the documentation | |
| 15 and/or other materials provided with the distribution. | |
| 16 | |
| 17 3. Neither the name of the MEDICAL RESEARCH COUNCIL, THE LABORATORY OF | |
| 18 MOLECULAR BIOLOGY nor the names of its contributors may be used to endorse or | |
| 19 promote products derived from this software without specific prior written | |
| 20 permission. | |
| 21 | |
| 22 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND | |
| 23 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | |
| 24 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | |
| 25 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR | |
| 26 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES | |
| 27 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | |
| 28 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON | |
| 29 ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
| 30 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | |
| 31 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
| 32 */ | |
| 33 | |
| 34 /* | |
| 35 Copyright (c) 2008, 2009, 2013, 2014 Genome Research Ltd. | |
| 36 Author: James Bonfield <jkb@sanger.ac.uk> | |
| 37 | |
| 38 Redistribution and use in source and binary forms, with or without | |
| 39 modification, are permitted provided that the following conditions are met: | |
| 40 | |
| 41 1. Redistributions of source code must retain the above copyright notice, | |
| 42 this list of conditions and the following disclaimer. | |
| 43 | |
| 44 2. Redistributions in binary form must reproduce the above copyright notice, | |
| 45 this list of conditions and the following disclaimer in the documentation | |
| 46 and/or other materials provided with the distribution. | |
| 47 | |
| 48 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger | |
| 49 Institute nor the names of its contributors may be used to endorse or promote | |
| 50 products derived from this software without specific prior written permission. | |
| 51 | |
| 52 THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND | |
| 53 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | |
| 54 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | |
| 55 DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE | |
| 56 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
| 57 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |
| 58 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |
| 59 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |
| 60 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
| 61 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
| 62 */ | |
| 63 | |
| 64 #include <stdlib.h> | |
| 65 #include <stdio.h> | |
| 66 #include <string.h> | |
| 67 #include <unistd.h> | |
| 68 #include <ctype.h> | |
| 69 #include <limits.h> | |
| 70 #include <sys/types.h> | |
| 71 #include <sys/stat.h> | |
| 72 #include "cram/os.h" | |
| 73 #ifndef PATH_MAX | |
| 74 # define PATH_MAX 1024 | |
| 75 #endif | |
| 76 | |
| 77 #include "cram/open_trace_file.h" | |
| 78 #include "cram/misc.h" | |
| 79 #include "htslib/hfile.h" | |
| 80 | |
| 81 /* | |
| 82 * Tokenises the search path splitting on colons (unix) or semicolons | |
| 83 * (windows). | |
| 84 * We also explicitly add a "./" to the end of the search path | |
| 85 * | |
| 86 * Returns: A new search path with items separated by nul chars. Two nul | |
| 87 * chars in a row represent the end of the tokenised path. | |
| 88 * Returns NULL for a failure. | |
| 89 * | |
| 90 * The returned data has been malloced. It is up to the caller to free this | |
| 91 * memory. | |
| 92 */ | |
| 93 char *tokenise_search_path(char *searchpath) { | |
| 94 char *newsearch; | |
| 95 unsigned int i, j; | |
| 96 size_t len; | |
| 97 #ifdef _WIN32 | |
| 98 char path_sep = ';'; | |
| 99 #else | |
| 100 char path_sep = ':'; | |
| 101 #endif | |
| 102 | |
| 103 if (!searchpath) | |
| 104 searchpath=""; | |
| 105 | |
| 106 newsearch = (char *)malloc((len = strlen(searchpath))+5); | |
| 107 if (!newsearch) | |
| 108 return NULL; | |
| 109 | |
| 110 for (i = 0, j = 0; i < len; i++) { | |
| 111 /* "::" => ":". Used for escaping colons in http://foo */ | |
| 112 if (i < len-1 && searchpath[i] == ':' && searchpath[i+1] == ':') { | |
| 113 newsearch[j++] = ':'; | |
| 114 i++; | |
| 115 continue; | |
| 116 } | |
| 117 | |
| 118 /* Handle http:// and ftp:// too without :: */ | |
| 119 if (path_sep == ':') { | |
| 120 if ((i == 0 || (i > 0 && searchpath[i-1] == ':')) && | |
| 121 (!strncmp(&searchpath[i], "http:", 5) || | |
| 122 !strncmp(&searchpath[i], "ftp:", 4) || | |
| 123 !strncmp(&searchpath[i], "|http:", 6) || | |
| 124 !strncmp(&searchpath[i], "|ftp:", 5) || | |
| 125 !strncmp(&searchpath[i], "URL=http:", 9) || | |
| 126 !strncmp(&searchpath[i], "URL=ftp:", 8))) { | |
| 127 do { | |
| 128 newsearch[j++] = searchpath[i]; | |
| 129 } while (i<len && searchpath[i++] != ':'); | |
| 130 if (searchpath[i] == ':') | |
| 131 i++; | |
| 132 if (searchpath[i]=='/') | |
| 133 newsearch[j++] = searchpath[i++]; | |
| 134 if (searchpath[i]=='/') | |
| 135 newsearch[j++] = searchpath[i++]; | |
| 136 // Look for host:port | |
| 137 do { | |
| 138 newsearch[j++] = searchpath[i++]; | |
| 139 } while (i<len && searchpath[i] != ':' && searchpath[i] != '/'); | |
| 140 newsearch[j++] = searchpath[i++]; | |
| 141 if (searchpath[i] == ':') | |
| 142 i++; | |
| 143 } | |
| 144 } | |
| 145 | |
| 146 if (searchpath[i] == path_sep) { | |
| 147 /* Skip blank path components */ | |
| 148 if (j && newsearch[j-1] != 0) | |
| 149 newsearch[j++] = 0; | |
| 150 } else { | |
| 151 newsearch[j++] = searchpath[i]; | |
| 152 } | |
| 153 } | |
| 154 | |
| 155 if (j) | |
| 156 newsearch[j++] = 0; | |
| 157 newsearch[j++] = '.'; | |
| 158 newsearch[j++] = '/'; | |
| 159 newsearch[j++] = 0; | |
| 160 newsearch[j++] = 0; | |
| 161 | |
| 162 return newsearch; | |
| 163 } | |
| 164 | |
| 165 mFILE *find_file_url(char *file, char *url) { | |
| 166 char buf[8192], *cp; | |
| 167 mFILE *mf = NULL; | |
| 168 int maxlen = 8190 - strlen(file), len; | |
| 169 hFILE *hf; | |
| 170 | |
| 171 /* Expand %s for the trace name */ | |
| 172 for (cp = buf; *url && cp - buf < maxlen; url++) { | |
| 173 if (*url == '%' && *(url+1) == 's') { | |
| 174 url++; | |
| 175 cp += strlen(strcpy(cp, file)); | |
| 176 } else { | |
| 177 *cp++ = *url; | |
| 178 } | |
| 179 } | |
| 180 *cp++ = 0; | |
| 181 | |
| 182 if (!(hf = hopen(buf, "r"))) | |
| 183 return NULL; | |
| 184 | |
| 185 if (NULL == (mf = mfcreate(NULL, 0))) | |
| 186 return NULL; | |
| 187 while ((len = hread(hf, buf, 8192)) > 0) { | |
| 188 if (mfwrite(buf, len, 1, mf) <= 0) { | |
| 189 hclose_abruptly(hf); | |
| 190 mfdestroy(mf); | |
| 191 return NULL; | |
| 192 } | |
| 193 } | |
| 194 if (hclose(hf) < 0) { | |
| 195 mfdestroy(mf); | |
| 196 return NULL; | |
| 197 } | |
| 198 | |
| 199 mrewind(mf); | |
| 200 return mf; | |
| 201 } | |
| 202 | |
| 203 /* | |
| 204 * Searches for file in the directory 'dirname'. If it finds it, it opens | |
| 205 * it. This also searches for compressed versions of the file in dirname | |
| 206 * too. | |
| 207 * | |
| 208 * Returns mFILE pointer if found | |
| 209 * NULL if not | |
| 210 */ | |
| 211 static mFILE *find_file_dir(char *file, char *dirname) { | |
| 212 char path[PATH_MAX+1]; | |
| 213 size_t len = strlen(dirname); | |
| 214 char *cp; | |
| 215 | |
| 216 if (dirname[len-1] == '/') | |
| 217 len--; | |
| 218 | |
| 219 /* Special case for "./" or absolute filenames */ | |
| 220 if (*file == '/' || (len==1 && *dirname == '.')) { | |
| 221 sprintf(path, "%s", file); | |
| 222 } else { | |
| 223 /* Handle %[0-9]*s expansions, if required */ | |
| 224 char *path_end = path; | |
| 225 *path = 0; | |
| 226 while ((cp = strchr(dirname, '%'))) { | |
| 227 char *endp; | |
| 228 long l = strtol(cp+1, &endp, 10); | |
| 229 if (*endp != 's') { | |
| 230 strncpy(path_end, dirname, (endp+1)-dirname); | |
| 231 path_end += (endp+1)-dirname; | |
| 232 dirname = endp+1; | |
| 233 continue; | |
| 234 } | |
| 235 | |
| 236 strncpy(path_end, dirname, cp-dirname); | |
| 237 path_end += cp-dirname; | |
| 238 if (l) { | |
| 239 strncpy(path_end, file, l); | |
| 240 path_end += MIN(strlen(file), l); | |
| 241 file += MIN(strlen(file), l); | |
| 242 } else { | |
| 243 strcpy(path_end, file); | |
| 244 path_end += strlen(file); | |
| 245 file += strlen(file); | |
| 246 } | |
| 247 len -= (endp+1) - dirname; | |
| 248 dirname = endp+1; | |
| 249 } | |
| 250 strncpy(path_end, dirname, len); | |
| 251 path_end += MIN(strlen(dirname), len); | |
| 252 *path_end = 0; | |
| 253 if (*file) { | |
| 254 *path_end++ = '/'; | |
| 255 strcpy(path_end, file); | |
| 256 } | |
| 257 | |
| 258 //fprintf(stderr, "*PATH=\"%s\"\n", path); | |
| 259 } | |
| 260 | |
| 261 if (is_file(path)) { | |
| 262 return mfopen(path, "rb"); | |
| 263 } | |
| 264 | |
| 265 return NULL; | |
| 266 } | |
| 267 | |
| 268 /* | |
| 269 * ------------------------------------------------------------------------ | |
| 270 * Public functions below. | |
| 271 */ | |
| 272 | |
| 273 /* | |
| 274 * Opens a trace file named 'file'. This is initially looked for as a | |
| 275 * pathname relative to a file named "relative_to". This may (for | |
| 276 * example) be the name of an experiment file referencing the trace | |
| 277 * file. In this case by passing relative_to as the experiment file | |
| 278 * filename the trace file will be picked up in the same directory as | |
| 279 * the experiment file. Relative_to may be supplied as NULL. | |
| 280 * | |
| 281 * 'file' is looked for at relative_to, then the current directory, and then | |
| 282 * all of the locations listed in 'path' (which is a colon separated list). | |
| 283 * If 'path' is NULL it uses the RAWDATA environment variable instead. | |
| 284 * | |
| 285 * Returns a mFILE pointer when found. | |
| 286 * NULL otherwise. | |
| 287 */ | |
| 288 mFILE *open_path_mfile(char *file, char *path, char *relative_to) { | |
| 289 char *newsearch; | |
| 290 char *ele; | |
| 291 mFILE *fp; | |
| 292 | |
| 293 /* Use path first */ | |
| 294 if (!path) | |
| 295 path = getenv("RAWDATA"); | |
| 296 if (NULL == (newsearch = tokenise_search_path(path))) | |
| 297 return NULL; | |
| 298 | |
| 299 /* | |
| 300 * Step through the search path testing out each component. | |
| 301 * We now look through each path element treating some prefixes as | |
| 302 * special, otherwise we treat the element as a directory. | |
| 303 */ | |
| 304 for (ele = newsearch; *ele; ele += strlen(ele)+1) { | |
| 305 char *ele2; | |
| 306 | |
| 307 /* | |
| 308 * '|' prefixing a path component indicates that we do not | |
| 309 * wish to perform the compression extension searching in that | |
| 310 * location. | |
| 311 * | |
| 312 * NB: this has been removed from the htslib implementation. | |
| 313 */ | |
| 314 if (*ele == '|') { | |
| 315 ele2 = ele+1; | |
| 316 } else { | |
| 317 ele2 = ele; | |
| 318 } | |
| 319 | |
| 320 if (0 == strncmp(ele2, "URL=", 4)) { | |
| 321 if ((fp = find_file_url(file, ele2+4))) { | |
| 322 free(newsearch); | |
| 323 return fp; | |
| 324 } | |
| 325 } else if (!strncmp(ele2, "http:", 5) || | |
| 326 !strncmp(ele2, "ftp:", 4)) { | |
| 327 if ((fp = find_file_url(file, ele2))) { | |
| 328 free(newsearch); | |
| 329 return fp; | |
| 330 } | |
| 331 } else if ((fp = find_file_dir(file, ele2))) { | |
| 332 free(newsearch); | |
| 333 return fp; | |
| 334 } | |
| 335 } | |
| 336 | |
| 337 free(newsearch); | |
| 338 | |
| 339 /* Look in the same location as the incoming 'relative_to' filename */ | |
| 340 if (relative_to) { | |
| 341 char *cp; | |
| 342 char relative_path[PATH_MAX+1]; | |
| 343 strcpy(relative_path, relative_to); | |
| 344 if ((cp = strrchr(relative_path, '/'))) | |
| 345 *cp = 0; | |
| 346 if ((fp = find_file_dir(file, relative_path))) | |
| 347 return fp; | |
| 348 } | |
| 349 | |
| 350 return NULL; | |
| 351 } |
