Mercurial > repos > ashvark > qiime_1_8_0
comparison bwa-0.6.2/bwtindex.c @ 2:a294fbfcb1db draft default tip
Uploaded BWA
author | ashvark |
---|---|
date | Fri, 18 Jul 2014 07:55:59 -0400 |
parents | dd1186b11b3b |
children |
comparison
equal
deleted
inserted
replaced
1:a9636dc1e99a | 2:a294fbfcb1db |
---|---|
1 /* The MIT License | |
2 | |
3 Copyright (c) 2008 Genome Research Ltd (GRL). | |
4 | |
5 Permission is hereby granted, free of charge, to any person obtaining | |
6 a copy of this software and associated documentation files (the | |
7 "Software"), to deal in the Software without restriction, including | |
8 without limitation the rights to use, copy, modify, merge, publish, | |
9 distribute, sublicense, and/or sell copies of the Software, and to | |
10 permit persons to whom the Software is furnished to do so, subject to | |
11 the following conditions: | |
12 | |
13 The above copyright notice and this permission notice shall be | |
14 included in all copies or substantial portions of the Software. | |
15 | |
16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
17 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |
18 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
19 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | |
20 BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | |
21 ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | |
22 CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |
23 SOFTWARE. | |
24 */ | |
25 | |
26 /* Contact: Heng Li <lh3@sanger.ac.uk> */ | |
27 | |
28 #include <stdio.h> | |
29 #include <stdlib.h> | |
30 #include <string.h> | |
31 #include <unistd.h> | |
32 #include <time.h> | |
33 #include <zlib.h> | |
34 #include "bntseq.h" | |
35 #include "bwt.h" | |
36 #include "main.h" | |
37 #include "utils.h" | |
38 | |
39 bwt_t *bwt_pac2bwt(const char *fn_pac, int use_is); | |
40 void bwa_pac_rev_core(const char *fn, const char *fn_rev); | |
41 | |
42 int bwa_index(int argc, char *argv[]) | |
43 { | |
44 char *prefix = 0, *str, *str2, *str3; | |
45 int c, algo_type = 0, is_color = 0, is_64 = 0; | |
46 clock_t t; | |
47 int64_t l_pac; | |
48 | |
49 while ((c = getopt(argc, argv, "6ca:p:")) >= 0) { | |
50 switch (c) { | |
51 case 'a': // if -a is not set, algo_type will be determined later | |
52 if (strcmp(optarg, "div") == 0) algo_type = 1; | |
53 else if (strcmp(optarg, "bwtsw") == 0) algo_type = 2; | |
54 else if (strcmp(optarg, "is") == 0) algo_type = 3; | |
55 else err_fatal(__func__, "unknown algorithm: '%s'.", optarg); | |
56 break; | |
57 case 'p': prefix = strdup(optarg); break; | |
58 case 'c': is_color = 1; break; | |
59 case '6': is_64 = 1; break; | |
60 default: return 1; | |
61 } | |
62 } | |
63 | |
64 if (optind + 1 > argc) { | |
65 fprintf(stderr, "\n"); | |
66 fprintf(stderr, "Usage: bwa index [-a bwtsw|is] [-c] <in.fasta>\n\n"); | |
67 fprintf(stderr, "Options: -a STR BWT construction algorithm: bwtsw or is [auto]\n"); | |
68 fprintf(stderr, " -p STR prefix of the index [same as fasta name]\n"); | |
69 fprintf(stderr, " -6 index files named as <in.fasta>.64.* instead of <in.fasta>.* \n"); | |
70 // fprintf(stderr, " -c build color-space index\n"); | |
71 fprintf(stderr, "\n"); | |
72 fprintf(stderr, "Warning: `-a bwtsw' does not work for short genomes, while `-a is' and\n"); | |
73 fprintf(stderr, " `-a div' do not work not for long genomes. Please choose `-a'\n"); | |
74 fprintf(stderr, " according to the length of the genome.\n\n"); | |
75 return 1; | |
76 } | |
77 if (prefix == 0) { | |
78 prefix = malloc(strlen(argv[optind]) + 4); | |
79 strcpy(prefix, argv[optind]); | |
80 if (is_64) strcat(prefix, ".64"); | |
81 } | |
82 str = (char*)calloc(strlen(prefix) + 10, 1); | |
83 str2 = (char*)calloc(strlen(prefix) + 10, 1); | |
84 str3 = (char*)calloc(strlen(prefix) + 10, 1); | |
85 | |
86 if (is_color == 0) { // nucleotide indexing | |
87 gzFile fp = xzopen(argv[optind], "r"); | |
88 t = clock(); | |
89 fprintf(stderr, "[bwa_index] Pack FASTA... "); | |
90 l_pac = bns_fasta2bntseq(fp, prefix, 0); | |
91 fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC); | |
92 gzclose(fp); | |
93 } else { // color indexing | |
94 gzFile fp = xzopen(argv[optind], "r"); | |
95 strcat(strcpy(str, prefix), ".nt"); | |
96 t = clock(); | |
97 fprintf(stderr, "[bwa_index] Pack nucleotide FASTA... "); | |
98 l_pac = bns_fasta2bntseq(fp, str, 0); | |
99 fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC); | |
100 gzclose(fp); | |
101 { | |
102 char *tmp_argv[3]; | |
103 tmp_argv[0] = argv[0]; tmp_argv[1] = str; tmp_argv[2] = prefix; | |
104 t = clock(); | |
105 fprintf(stderr, "[bwa_index] Convert nucleotide PAC to color PAC... "); | |
106 bwa_pac2cspac(3, tmp_argv); | |
107 fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC); | |
108 } | |
109 } | |
110 if (algo_type == 0) algo_type = l_pac > 50000000? 2 : 3; // set the algorithm for generating BWT | |
111 { | |
112 strcpy(str, prefix); strcat(str, ".pac"); | |
113 strcpy(str2, prefix); strcat(str2, ".bwt"); | |
114 t = clock(); | |
115 fprintf(stderr, "[bwa_index] Construct BWT for the packed sequence...\n"); | |
116 if (algo_type == 2) bwt_bwtgen(str, str2); | |
117 else if (algo_type == 1 || algo_type == 3) { | |
118 bwt_t *bwt; | |
119 bwt = bwt_pac2bwt(str, algo_type == 3); | |
120 bwt_dump_bwt(str2, bwt); | |
121 bwt_destroy(bwt); | |
122 } | |
123 fprintf(stderr, "[bwa_index] %.2f seconds elapse.\n", (float)(clock() - t) / CLOCKS_PER_SEC); | |
124 } | |
125 { | |
126 bwt_t *bwt; | |
127 strcpy(str, prefix); strcat(str, ".bwt"); | |
128 t = clock(); | |
129 fprintf(stderr, "[bwa_index] Update BWT... "); | |
130 bwt = bwt_restore_bwt(str); | |
131 bwt_bwtupdate_core(bwt); | |
132 bwt_dump_bwt(str, bwt); | |
133 bwt_destroy(bwt); | |
134 fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC); | |
135 } | |
136 { | |
137 gzFile fp = xzopen(argv[optind], "r"); | |
138 t = clock(); | |
139 fprintf(stderr, "[bwa_index] Pack forward-only FASTA... "); | |
140 l_pac = bns_fasta2bntseq(fp, prefix, 1); | |
141 fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC); | |
142 gzclose(fp); | |
143 } | |
144 { | |
145 bwt_t *bwt; | |
146 strcpy(str, prefix); strcat(str, ".bwt"); | |
147 strcpy(str3, prefix); strcat(str3, ".sa"); | |
148 t = clock(); | |
149 fprintf(stderr, "[bwa_index] Construct SA from BWT and Occ... "); | |
150 bwt = bwt_restore_bwt(str); | |
151 bwt_cal_sa(bwt, 32); | |
152 bwt_dump_sa(str3, bwt); | |
153 bwt_destroy(bwt); | |
154 fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC); | |
155 } | |
156 free(str3); free(str2); free(str); free(prefix); | |
157 return 0; | |
158 } |