Mercurial > repos > youngkim > ezbamqc
comparison ezBAMQC/src/htslib/tabix.c @ 0:dfa3745e5fd8
Uploaded
| author | youngkim |
|---|---|
| date | Thu, 24 Mar 2016 17:12:52 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:dfa3745e5fd8 |
|---|---|
| 1 /* tabix.c -- Generic indexer for TAB-delimited genome position files. | |
| 2 | |
| 3 Copyright (C) 2009-2011 Broad Institute. | |
| 4 Copyright (C) 2010-2012, 2014 Genome Research Ltd. | |
| 5 | |
| 6 Author: Heng Li <lh3@sanger.ac.uk> | |
| 7 | |
| 8 Permission is hereby granted, free of charge, to any person obtaining a copy | |
| 9 of this software and associated documentation files (the "Software"), to deal | |
| 10 in the Software without restriction, including without limitation the rights | |
| 11 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |
| 12 copies of the Software, and to permit persons to whom the Software is | |
| 13 furnished to do so, subject to the following conditions: | |
| 14 | |
| 15 The above copyright notice and this permission notice shall be included in | |
| 16 all copies or substantial portions of the Software. | |
| 17 | |
| 18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
| 19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
| 20 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |
| 21 THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
| 22 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
| 23 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | |
| 24 DEALINGS IN THE SOFTWARE. */ | |
| 25 | |
| 26 #include <stdio.h> | |
| 27 #include <stdlib.h> | |
| 28 #include <unistd.h> | |
| 29 #include <string.h> | |
| 30 #include <getopt.h> | |
| 31 #include <sys/types.h> | |
| 32 #include <sys/stat.h> | |
| 33 #include <errno.h> | |
| 34 #include "htslib/tbx.h" | |
| 35 #include "htslib/sam.h" | |
| 36 #include "htslib/vcf.h" | |
| 37 #include "htslib/kseq.h" | |
| 38 #include "htslib/bgzf.h" | |
| 39 #include "htslib/hts.h" | |
| 40 #include "htslib/regidx.h" | |
| 41 | |
| 42 typedef struct | |
| 43 { | |
| 44 char *regions_fname, *targets_fname; | |
| 45 int print_header, header_only; | |
| 46 } | |
| 47 args_t; | |
| 48 | |
| 49 static void error(const char *format, ...) | |
| 50 { | |
| 51 va_list ap; | |
| 52 va_start(ap, format); | |
| 53 vfprintf(stderr, format, ap); | |
| 54 va_end(ap); | |
| 55 exit(EXIT_FAILURE); | |
| 56 } | |
| 57 | |
| 58 #define IS_GFF (1<<0) | |
| 59 #define IS_BED (1<<1) | |
| 60 #define IS_SAM (1<<2) | |
| 61 #define IS_VCF (1<<3) | |
| 62 #define IS_BCF (1<<4) | |
| 63 #define IS_BAM (1<<5) | |
| 64 #define IS_CRAM (1<<6) | |
| 65 #define IS_TXT (IS_GFF|IS_BED|IS_SAM|IS_VCF) | |
| 66 | |
| 67 int file_type(const char *fname) | |
| 68 { | |
| 69 int l = strlen(fname); | |
| 70 int strcasecmp(const char *s1, const char *s2); | |
| 71 if (l>=7 && strcasecmp(fname+l-7, ".gff.gz") == 0) return IS_GFF; | |
| 72 else if (l>=7 && strcasecmp(fname+l-7, ".bed.gz") == 0) return IS_BED; | |
| 73 else if (l>=7 && strcasecmp(fname+l-7, ".sam.gz") == 0) return IS_SAM; | |
| 74 else if (l>=7 && strcasecmp(fname+l-7, ".vcf.gz") == 0) return IS_VCF; | |
| 75 else if (l>=4 && strcasecmp(fname+l-4, ".bcf") == 0) return IS_BCF; | |
| 76 else if (l>=4 && strcasecmp(fname+l-4, ".bam") == 0) return IS_BAM; | |
| 77 else if (l>=4 && strcasecmp(fname+l-5, ".cram") == 0) return IS_CRAM; | |
| 78 | |
| 79 htsFile *fp = hts_open(fname,"r"); | |
| 80 enum htsExactFormat format = fp->format.format; | |
| 81 hts_close(fp); | |
| 82 if ( format == bcf ) return IS_BCF; | |
| 83 if ( format == bam ) return IS_BAM; | |
| 84 if ( format == cram ) return IS_CRAM; | |
| 85 if ( format == vcf ) return IS_VCF; | |
| 86 | |
| 87 return 0; | |
| 88 } | |
| 89 | |
| 90 static char **parse_regions(char *regions_fname, char **argv, int argc, int *nregs) | |
| 91 { | |
| 92 kstring_t str = {0,0,0}; | |
| 93 int iseq = 0, ireg = 0; | |
| 94 char **regs = NULL; | |
| 95 *nregs = argc; | |
| 96 | |
| 97 if ( regions_fname ) | |
| 98 { | |
| 99 // improve me: this is a too heavy machinery for parsing regions... | |
| 100 | |
| 101 regidx_t *idx = regidx_init(regions_fname, NULL, NULL, 0, NULL); | |
| 102 if ( !idx ) error("Could not read %s\n", regions_fname); | |
| 103 | |
| 104 (*nregs) += regidx_nregs(idx); | |
| 105 regs = (char**) malloc(sizeof(char*)*(*nregs)); | |
| 106 | |
| 107 int nseq; | |
| 108 char **seqs = regidx_seq_names(idx, &nseq); | |
| 109 for (iseq=0; iseq<nseq; iseq++) | |
| 110 { | |
| 111 regitr_t itr; | |
| 112 regidx_overlap(idx, seqs[iseq], 0, UINT32_MAX, &itr); | |
| 113 while ( itr.i < itr.n ) | |
| 114 { | |
| 115 str.l = 0; | |
| 116 ksprintf(&str, "%s:%d-%d", seqs[iseq], REGITR_START(itr)+1, REGITR_END(itr)+1); | |
| 117 regs[ireg++] = strdup(str.s); | |
| 118 itr.i++; | |
| 119 } | |
| 120 } | |
| 121 regidx_destroy(idx); | |
| 122 } | |
| 123 free(str.s); | |
| 124 | |
| 125 if ( !ireg ) | |
| 126 { | |
| 127 if ( argc ) | |
| 128 regs = (char**) malloc(sizeof(char*)*argc); | |
| 129 else | |
| 130 { | |
| 131 regs = (char**) malloc(sizeof(char*)); | |
| 132 regs[0] = strdup("."); | |
| 133 *nregs = 1; | |
| 134 } | |
| 135 } | |
| 136 | |
| 137 for (iseq=0; iseq<argc; iseq++) regs[ireg++] = strdup(argv[iseq]); | |
| 138 return regs; | |
| 139 } | |
| 140 static int query_regions(args_t *args, char *fname, char **regs, int nregs) | |
| 141 { | |
| 142 int i; | |
| 143 htsFile *fp = hts_open(fname,"r"); | |
| 144 if ( !fp ) error("Could not read %s\n", fname); | |
| 145 enum htsExactFormat format = hts_get_format(fp)->format; | |
| 146 | |
| 147 regidx_t *reg_idx = NULL; | |
| 148 if ( args->targets_fname ) | |
| 149 { | |
| 150 reg_idx = regidx_init(args->targets_fname, NULL, NULL, 0, NULL); | |
| 151 if ( !reg_idx ) error("Could not read %s\n", args->targets_fname); | |
| 152 } | |
| 153 | |
| 154 if ( format == bcf ) | |
| 155 { | |
| 156 htsFile *out = hts_open("-","w"); | |
| 157 if ( !out ) error("Could not open stdout\n", fname); | |
| 158 hts_idx_t *idx = bcf_index_load(fname); | |
| 159 if ( !idx ) error("Could not load .csi index of %s\n", fname); | |
| 160 bcf_hdr_t *hdr = bcf_hdr_read(fp); | |
| 161 if ( !hdr ) error("Could not read the header: %s\n", fname); | |
| 162 if ( args->print_header ) | |
| 163 bcf_hdr_write(out,hdr); | |
| 164 if ( !args->header_only ) | |
| 165 { | |
| 166 bcf1_t *rec = bcf_init(); | |
| 167 for (i=0; i<nregs; i++) | |
| 168 { | |
| 169 hts_itr_t *itr = bcf_itr_querys(idx,hdr,regs[i]); | |
| 170 while ( bcf_itr_next(fp, itr, rec) >=0 ) | |
| 171 { | |
| 172 if ( reg_idx && !regidx_overlap(reg_idx, bcf_seqname(hdr,rec),rec->pos,rec->pos+rec->rlen-1, NULL) ) continue; | |
| 173 bcf_write(out,hdr,rec); | |
| 174 } | |
| 175 tbx_itr_destroy(itr); | |
| 176 } | |
| 177 bcf_destroy(rec); | |
| 178 } | |
| 179 if ( hts_close(out) ) error("hts_close returned non-zero status for stdout\n"); | |
| 180 bcf_hdr_destroy(hdr); | |
| 181 hts_idx_destroy(idx); | |
| 182 } | |
| 183 else if ( format==vcf || format==sam || format==unknown_format ) | |
| 184 { | |
| 185 tbx_t *tbx = tbx_index_load(fname); | |
| 186 if ( !tbx ) error("Could not load .tbi/.csi index of %s\n", fname); | |
| 187 kstring_t str = {0,0,0}; | |
| 188 if ( args->print_header ) | |
| 189 { | |
| 190 while ( hts_getline(fp, KS_SEP_LINE, &str) >= 0 ) | |
| 191 { | |
| 192 if ( !str.l || str.s[0]!=tbx->conf.meta_char ) break; | |
| 193 puts(str.s); | |
| 194 } | |
| 195 } | |
| 196 if ( !args->header_only ) | |
| 197 { | |
| 198 int nseq; | |
| 199 const char **seq = NULL; | |
| 200 if ( reg_idx ) seq = tbx_seqnames(tbx, &nseq); | |
| 201 for (i=0; i<nregs; i++) | |
| 202 { | |
| 203 hts_itr_t *itr = tbx_itr_querys(tbx, regs[i]); | |
| 204 if ( !itr ) continue; | |
| 205 while (tbx_itr_next(fp, tbx, itr, &str) >= 0) | |
| 206 { | |
| 207 if ( reg_idx && !regidx_overlap(reg_idx,seq[itr->curr_tid],itr->curr_beg,itr->curr_end, NULL) ) continue; | |
| 208 puts(str.s); | |
| 209 } | |
| 210 tbx_itr_destroy(itr); | |
| 211 } | |
| 212 free(seq); | |
| 213 } | |
| 214 free(str.s); | |
| 215 tbx_destroy(tbx); | |
| 216 } | |
| 217 else if ( format==bam ) | |
| 218 error("Please use \"samtools view\" for querying BAM files.\n"); | |
| 219 | |
| 220 if ( reg_idx ) regidx_destroy(reg_idx); | |
| 221 if ( hts_close(fp) ) error("hts_close returned non-zero status: %s\n", fname); | |
| 222 | |
| 223 for (i=0; i<nregs; i++) free(regs[i]); | |
| 224 free(regs); | |
| 225 return 0; | |
| 226 } | |
| 227 static int query_chroms(char *fname) | |
| 228 { | |
| 229 const char **seq; | |
| 230 int i, nseq, ftype = file_type(fname); | |
| 231 if ( ftype & IS_TXT || !ftype ) | |
| 232 { | |
| 233 tbx_t *tbx = tbx_index_load(fname); | |
| 234 if ( !tbx ) error("Could not load .tbi index of %s\n", fname); | |
| 235 seq = tbx_seqnames(tbx, &nseq); | |
| 236 for (i=0; i<nseq; i++) | |
| 237 printf("%s\n", seq[i]); | |
| 238 free(seq); | |
| 239 tbx_destroy(tbx); | |
| 240 } | |
| 241 else if ( ftype==IS_BCF ) | |
| 242 { | |
| 243 htsFile *fp = hts_open(fname,"r"); | |
| 244 if ( !fp ) error("Could not read %s\n", fname); | |
| 245 bcf_hdr_t *hdr = bcf_hdr_read(fp); | |
| 246 if ( !hdr ) error("Could not read the header: %s\n", fname); | |
| 247 hts_close(fp); | |
| 248 hts_idx_t *idx = bcf_index_load(fname); | |
| 249 if ( !idx ) error("Could not load .csi index of %s\n", fname); | |
| 250 seq = bcf_index_seqnames(idx, hdr, &nseq); | |
| 251 for (i=0; i<nseq; i++) | |
| 252 printf("%s\n", seq[i]); | |
| 253 free(seq); | |
| 254 bcf_hdr_destroy(hdr); | |
| 255 hts_idx_destroy(idx); | |
| 256 } | |
| 257 else if ( ftype==IS_BAM ) // todo: BAM | |
| 258 error("BAM: todo\n"); | |
| 259 return 0; | |
| 260 } | |
| 261 | |
| 262 int reheader_file(const char *fname, const char *header, int ftype, tbx_conf_t *conf) | |
| 263 { | |
| 264 if ( ftype & IS_TXT || !ftype ) | |
| 265 { | |
| 266 BGZF *fp = bgzf_open(fname,"r"); | |
| 267 if ( !fp || bgzf_read_block(fp) != 0 || !fp->block_length ) return -1; | |
| 268 | |
| 269 char *buffer = fp->uncompressed_block; | |
| 270 int skip_until = 0; | |
| 271 | |
| 272 // Skip the header: find out the position of the data block | |
| 273 if ( buffer[0]==conf->meta_char ) | |
| 274 { | |
| 275 skip_until = 1; | |
| 276 while (1) | |
| 277 { | |
| 278 if ( buffer[skip_until]=='\n' ) | |
| 279 { | |
| 280 skip_until++; | |
| 281 if ( skip_until>=fp->block_length ) | |
| 282 { | |
| 283 if ( bgzf_read_block(fp) != 0 || !fp->block_length ) error("FIXME: No body in the file: %s\n", fname); | |
| 284 skip_until = 0; | |
| 285 } | |
| 286 // The header has finished | |
| 287 if ( buffer[skip_until]!=conf->meta_char ) break; | |
| 288 } | |
| 289 skip_until++; | |
| 290 if ( skip_until>=fp->block_length ) | |
| 291 { | |
| 292 if (bgzf_read_block(fp) != 0 || !fp->block_length) error("FIXME: No body in the file: %s\n", fname); | |
| 293 skip_until = 0; | |
| 294 } | |
| 295 } | |
| 296 } | |
| 297 | |
| 298 // Output the new header | |
| 299 FILE *hdr = fopen(header,"r"); | |
| 300 if ( !hdr ) error("%s: %s", header,strerror(errno)); | |
| 301 int page_size = getpagesize(); | |
| 302 char *buf = valloc(page_size); | |
| 303 BGZF *bgzf_out = bgzf_dopen(fileno(stdout), "w"); | |
| 304 ssize_t nread; | |
| 305 while ( (nread=fread(buf,1,page_size-1,hdr))>0 ) | |
| 306 { | |
| 307 if ( nread<page_size-1 && buf[nread-1]!='\n' ) buf[nread++] = '\n'; | |
| 308 if (bgzf_write(bgzf_out, buf, nread) < 0) error("Error: %d\n",bgzf_out->errcode); | |
| 309 } | |
| 310 if ( fclose(hdr) ) error("close failed: %s\n", header); | |
| 311 | |
| 312 // Output all remainig data read with the header block | |
| 313 if ( fp->block_length - skip_until > 0 ) | |
| 314 { | |
| 315 if (bgzf_write(bgzf_out, buffer+skip_until, fp->block_length-skip_until) < 0) error("Error: %d\n",fp->errcode); | |
| 316 } | |
| 317 if (bgzf_flush(bgzf_out) < 0) error("Error: %d\n",bgzf_out->errcode); | |
| 318 | |
| 319 while (1) | |
| 320 { | |
| 321 nread = bgzf_raw_read(fp, buf, page_size); | |
| 322 if ( nread<=0 ) break; | |
| 323 | |
| 324 int count = bgzf_raw_write(bgzf_out, buf, nread); | |
| 325 if (count != nread) error("Write failed, wrote %d instead of %d bytes.\n", count,(int)nread); | |
| 326 } | |
| 327 if (bgzf_close(bgzf_out) < 0) error("Error: %d\n",bgzf_out->errcode); | |
| 328 if (bgzf_close(fp) < 0) error("Error: %d\n",fp->errcode); | |
| 329 } | |
| 330 else | |
| 331 error("todo: reheader BCF, BAM\n"); // BCF is difficult, records contain pointers to the header. | |
| 332 return 0; | |
| 333 } | |
| 334 | |
| 335 static int usage(void) | |
| 336 { | |
| 337 fprintf(stderr, "\n"); | |
| 338 fprintf(stderr, "Version: %s\n", hts_version()); | |
| 339 fprintf(stderr, "Usage: tabix [OPTIONS] [FILE] [REGION [...]]\n"); | |
| 340 fprintf(stderr, "\n"); | |
| 341 fprintf(stderr, "Indexing Options:\n"); | |
| 342 fprintf(stderr, " -0, --zero-based coordinates are zero-based\n"); | |
| 343 fprintf(stderr, " -b, --begin INT column number for region start [4]\n"); | |
| 344 fprintf(stderr, " -c, --comment CHAR skip comment lines starting with CHAR [null]\n"); | |
| 345 fprintf(stderr, " -C, --csi generate CSI index for VCF (default is TBI)\n"); | |
| 346 fprintf(stderr, " -e, --end INT column number for region end (if no end, set INT to -b) [5]\n"); | |
| 347 fprintf(stderr, " -f, --force overwrite existing index without asking\n"); | |
| 348 fprintf(stderr, " -m, --min-shift INT set minimal interval size for CSI indices to 2^INT [14]\n"); | |
| 349 fprintf(stderr, " -p, --preset STR gff, bed, sam, vcf\n"); | |
| 350 fprintf(stderr, " -s, --sequence INT column number for sequence names (suppressed by -p) [1]\n"); | |
| 351 fprintf(stderr, " -S, --skip-lines INT skip first INT lines [0]\n"); | |
| 352 fprintf(stderr, "\n"); | |
| 353 fprintf(stderr, "Querying and other options:\n"); | |
| 354 fprintf(stderr, " -h, --print-header print also the header lines\n"); | |
| 355 fprintf(stderr, " -H, --only-header print only the header lines\n"); | |
| 356 fprintf(stderr, " -l, --list-chroms list chromosome names\n"); | |
| 357 fprintf(stderr, " -r, --reheader FILE replace the header with the content of FILE\n"); | |
| 358 fprintf(stderr, " -R, --regions FILE restrict to regions listed in the file\n"); | |
| 359 fprintf(stderr, " -T, --targets FILE similar to -R but streams rather than index-jumps\n"); | |
| 360 fprintf(stderr, "\n"); | |
| 361 return 1; | |
| 362 } | |
| 363 | |
| 364 int main(int argc, char *argv[]) | |
| 365 { | |
| 366 int c, min_shift = 0, is_force = 0, list_chroms = 0, do_csi = 0; | |
| 367 tbx_conf_t conf = tbx_conf_gff, *conf_ptr = NULL; | |
| 368 char *reheader = NULL; | |
| 369 args_t args; | |
| 370 memset(&args,0,sizeof(args_t)); | |
| 371 | |
| 372 static struct option loptions[] = | |
| 373 { | |
| 374 {"help",0,0,'h'}, | |
| 375 {"regions",1,0,'R'}, | |
| 376 {"targets",1,0,'T'}, | |
| 377 {"csi",0,0,'C'}, | |
| 378 {"zero-based",0,0,'0'}, | |
| 379 {"print-header",0,0,'h'}, | |
| 380 {"only-header",0,0,'H'}, | |
| 381 {"begin",1,0,'b'}, | |
| 382 {"comment",1,0,'c'}, | |
| 383 {"end",1,0,'e'}, | |
| 384 {"force",0,0,'f'}, | |
| 385 {"preset",1,0,'p'}, | |
| 386 {"sequence",1,0,'s'}, | |
| 387 {"skip-lines",1,0,'S'}, | |
| 388 {"list-chroms",0,0,'l'}, | |
| 389 {"reheader",1,0,'r'}, | |
| 390 {0,0,0,0} | |
| 391 }; | |
| 392 | |
| 393 while ((c = getopt_long(argc, argv, "hH?0b:c:e:fm:p:s:S:lr:CR:T:", loptions,NULL)) >= 0) | |
| 394 { | |
| 395 switch (c) | |
| 396 { | |
| 397 case 'R': args.regions_fname = optarg; break; | |
| 398 case 'T': args.targets_fname = optarg; break; | |
| 399 case 'C': do_csi = 1; break; | |
| 400 case 'r': reheader = optarg; break; | |
| 401 case 'h': args.print_header = 1; break; | |
| 402 case 'H': args.header_only = 1; break; | |
| 403 case 'l': list_chroms = 1; break; | |
| 404 case '0': conf.preset |= TBX_UCSC; break; | |
| 405 case 'b': conf.bc = atoi(optarg); break; | |
| 406 case 'e': conf.ec = atoi(optarg); break; | |
| 407 case 'c': conf.meta_char = *optarg; break; | |
| 408 case 'f': is_force = 1; break; | |
| 409 case 'm': min_shift = atoi(optarg); break; | |
| 410 case 'p': | |
| 411 if (strcmp(optarg, "gff") == 0) conf_ptr = &tbx_conf_gff; | |
| 412 else if (strcmp(optarg, "bed") == 0) conf_ptr = &tbx_conf_bed; | |
| 413 else if (strcmp(optarg, "sam") == 0) conf_ptr = &tbx_conf_sam; | |
| 414 else if (strcmp(optarg, "vcf") == 0) conf_ptr = &tbx_conf_vcf; | |
| 415 else if (strcmp(optarg, "bcf") == 0) ; // bcf is autodetected, preset is not needed | |
| 416 else if (strcmp(optarg, "bam") == 0) ; // same as bcf | |
| 417 else error("The preset string not recognised: '%s'\n", optarg); | |
| 418 break; | |
| 419 case 's': conf.sc = atoi(optarg); break; | |
| 420 case 'S': conf.line_skip = atoi(optarg); break; | |
| 421 default: return usage(); | |
| 422 } | |
| 423 } | |
| 424 | |
| 425 if ( optind==argc ) return usage(); | |
| 426 | |
| 427 if ( list_chroms ) | |
| 428 return query_chroms(argv[optind]); | |
| 429 | |
| 430 if ( argc > optind+1 || args.header_only || args.regions_fname || args.targets_fname ) | |
| 431 { | |
| 432 int nregs = 0; | |
| 433 char **regs = NULL; | |
| 434 if ( !args.header_only ) | |
| 435 regs = parse_regions(args.regions_fname, argv+optind+1, argc-optind-1, &nregs); | |
| 436 return query_regions(&args, argv[optind], regs, nregs); | |
| 437 } | |
| 438 | |
| 439 char *fname = argv[optind]; | |
| 440 int ftype = file_type(fname); | |
| 441 if ( !conf_ptr ) // no preset given | |
| 442 { | |
| 443 if ( ftype==IS_GFF ) conf_ptr = &tbx_conf_gff; | |
| 444 else if ( ftype==IS_BED ) conf_ptr = &tbx_conf_bed; | |
| 445 else if ( ftype==IS_SAM ) conf_ptr = &tbx_conf_sam; | |
| 446 else if ( ftype==IS_VCF ) | |
| 447 { | |
| 448 conf_ptr = &tbx_conf_vcf; | |
| 449 if ( !min_shift && do_csi ) min_shift = 14; | |
| 450 } | |
| 451 else if ( ftype==IS_BCF ) | |
| 452 { | |
| 453 if ( !min_shift ) min_shift = 14; | |
| 454 } | |
| 455 else if ( ftype==IS_BAM ) | |
| 456 { | |
| 457 if ( !min_shift ) min_shift = 14; | |
| 458 } | |
| 459 } | |
| 460 if ( do_csi ) | |
| 461 { | |
| 462 if ( !min_shift ) min_shift = 14; | |
| 463 min_shift *= do_csi; // positive for CSIv2, negative for CSIv1 | |
| 464 } | |
| 465 if ( min_shift!=0 && !do_csi ) do_csi = 1; | |
| 466 | |
| 467 if ( reheader ) | |
| 468 return reheader_file(fname, reheader, ftype, conf_ptr); | |
| 469 | |
| 470 if ( conf_ptr ) | |
| 471 conf = *conf_ptr; | |
| 472 | |
| 473 char *suffix = ".tbi"; | |
| 474 if ( do_csi ) suffix = ".csi"; | |
| 475 else if ( ftype==IS_BAM ) suffix = ".bai"; | |
| 476 else if ( ftype==IS_CRAM ) suffix = ".crai"; | |
| 477 | |
| 478 char *idx_fname = calloc(strlen(fname) + 5, 1); | |
| 479 strcat(strcpy(idx_fname, fname), suffix); | |
| 480 | |
| 481 struct stat stat_tbi, stat_file; | |
| 482 if ( !is_force && stat(idx_fname, &stat_tbi)==0 ) | |
| 483 { | |
| 484 // Before complaining about existing index, check if the VCF file isn't | |
| 485 // newer. This is a common source of errors, people tend not to notice | |
| 486 // that tabix failed | |
| 487 stat(fname, &stat_file); | |
| 488 if ( stat_file.st_mtime <= stat_tbi.st_mtime ) | |
| 489 error("[tabix] the index file exists. Please use '-f' to overwrite.\n"); | |
| 490 } | |
| 491 free(idx_fname); | |
| 492 | |
| 493 if ( ftype==IS_CRAM ) | |
| 494 { | |
| 495 if ( bam_index_build(fname, min_shift)!=0 ) error("bam_index_build failed: %s\n", fname); | |
| 496 return 0; | |
| 497 } | |
| 498 else if ( do_csi ) | |
| 499 { | |
| 500 if ( ftype==IS_BCF ) | |
| 501 { | |
| 502 if ( bcf_index_build(fname, min_shift)!=0 ) error("bcf_index_build failed: %s\n", fname); | |
| 503 return 0; | |
| 504 } | |
| 505 if ( ftype==IS_BAM ) | |
| 506 { | |
| 507 if ( bam_index_build(fname, min_shift)!=0 ) error("bam_index_build failed: %s\n", fname); | |
| 508 return 0; | |
| 509 } | |
| 510 if ( tbx_index_build(fname, min_shift, &conf)!=0 ) error("tbx_index_build failed: %s\n", fname); | |
| 511 return 0; | |
| 512 } | |
| 513 else // TBI index | |
| 514 { | |
| 515 if ( tbx_index_build(fname, min_shift, &conf) ) error("tbx_index_build failed: %s\n", fname); | |
| 516 return 0; | |
| 517 } | |
| 518 return 0; | |
| 519 } |
