annotate bwa-0.6.2/bwtsw2_main.c @ 2:a294fbfcb1db draft default tip

Uploaded BWA
author ashvark
date Fri, 18 Jul 2014 07:55:59 -0400
parents dd1186b11b3b
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1 #include <unistd.h>
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
2 #include <stdlib.h>
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
3 #include <string.h>
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
4 #include <stdio.h>
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
5 #include <math.h>
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
6 #include "bwt.h"
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
7 #include "bwtsw2.h"
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
8 #include "utils.h"
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
9
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
10 int bwa_bwtsw2(int argc, char *argv[])
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
11 {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
12 extern char *bwa_infer_prefix(const char *hint);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
13 bsw2opt_t *opt;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
14 bwt_t *target;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
15 char buf[1024], *prefix;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
16 bntseq_t *bns;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
17 int c;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
18
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
19 opt = bsw2_init_opt();
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
20 srand48(11);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
21 while ((c = getopt(argc, argv, "q:r:a:b:t:T:w:d:z:m:s:c:N:Hf:MI:S")) >= 0) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
22 switch (c) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
23 case 'q': opt->q = atoi(optarg); break;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
24 case 'r': opt->r = atoi(optarg); break;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
25 case 'a': opt->a = atoi(optarg); break;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
26 case 'b': opt->b = atoi(optarg); break;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
27 case 'w': opt->bw = atoi(optarg); break;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
28 case 'T': opt->t = atoi(optarg); break;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
29 case 't': opt->n_threads = atoi(optarg); break;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
30 case 'z': opt->z = atoi(optarg); break;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
31 case 's': opt->is = atoi(optarg); break;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
32 case 'm': opt->mask_level = atof(optarg); break;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
33 case 'c': opt->coef = atof(optarg); break;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
34 case 'N': opt->t_seeds = atoi(optarg); break;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
35 case 'M': opt->multi_2nd = 1; break;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
36 case 'H': opt->hard_clip = 1; break;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
37 case 'f': xreopen(optarg, "w", stdout); break;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
38 case 'I': opt->max_ins = atoi(optarg); break;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
39 case 'S': opt->skip_sw = 1; break;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
40 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
41 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
42 opt->qr = opt->q + opt->r;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
43
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
44 if (optind + 2 > argc) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
45 fprintf(stderr, "\n");
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
46 fprintf(stderr, "Usage: bwa bwasw [options] <target.prefix> <query.fa> [query2.fa]\n\n");
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
47 fprintf(stderr, "Options: -a INT score for a match [%d]\n", opt->a);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
48 fprintf(stderr, " -b INT mismatch penalty [%d]\n", opt->b);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
49 fprintf(stderr, " -q INT gap open penalty [%d]\n", opt->q);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
50 fprintf(stderr, " -r INT gap extension penalty [%d]\n", opt->r);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
51 fprintf(stderr, " -w INT band width [%d]\n", opt->bw);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
52 fprintf(stderr, " -m FLOAT mask level [%.2f]\n", opt->mask_level);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
53 fprintf(stderr, "\n");
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
54 fprintf(stderr, " -t INT number of threads [%d]\n", opt->n_threads);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
55 fprintf(stderr, " -f FILE file to output results to instead of stdout\n");
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
56 fprintf(stderr, " -H in SAM output, use hard clipping instead of soft clipping\n");
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
57 fprintf(stderr, " -M mark multi-part alignments as secondary\n");
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
58 fprintf(stderr, " -S skip Smith-Waterman read pairing\n");
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
59 fprintf(stderr, " -I INT ignore pairs with insert >=INT for inferring the size distr [%d]\n", opt->max_ins);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
60 fprintf(stderr, "\n");
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
61 fprintf(stderr, " -T INT score threshold divided by a [%d]\n", opt->t);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
62 fprintf(stderr, " -c FLOAT coefficient of length-threshold adjustment [%.1f]\n", opt->coef);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
63 fprintf(stderr, " -z INT Z-best [%d]\n", opt->z);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
64 fprintf(stderr, " -s INT maximum seeding interval size [%d]\n", opt->is);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
65 fprintf(stderr, " -N INT # seeds to trigger reverse alignment [%d]\n", opt->t_seeds);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
66 fprintf(stderr, "\n");
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
67 fprintf(stderr, "Note: For long Illumina, 454 and Sanger reads, assembly contigs, fosmids and\n");
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
68 fprintf(stderr, " BACs, the default setting usually works well. For the current PacBio\n");
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
69 fprintf(stderr, " reads (end of 2010), '-b5 -q2 -r1 -z10' is recommended. One may also\n");
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
70 fprintf(stderr, " increase '-z' for better sensitivity.\n");
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
71 fprintf(stderr, "\n");
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
72
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
73 return 1;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
74 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
75
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
76 // adjust opt for opt->a
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
77 opt->t *= opt->a;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
78 opt->coef *= opt->a;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
79
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
80 if ((prefix = bwa_infer_prefix(argv[optind])) == 0) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
81 fprintf(stderr, "[%s] fail to locate the index\n", __func__);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
82 return 0;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
83 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
84 strcpy(buf, prefix); target = bwt_restore_bwt(strcat(buf, ".bwt"));
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
85 strcpy(buf, prefix); bwt_restore_sa(strcat(buf, ".sa"), target);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
86 bns = bns_restore(prefix);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
87
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
88 bsw2_aln(opt, bns, target, argv[optind+1], optind+2 < argc? argv[optind+2] : 0);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
89
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
90 bns_destroy(bns);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
91 bwt_destroy(target);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
92 free(opt); free(prefix);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
93
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
94 return 0;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
95 }