annotate ezBAMQC/src/htslib/test/test-vcf-api.c @ 18:494b5cd02238

bash script
author youngkim
date Wed, 30 Mar 2016 13:39:05 -0400
parents dfa3745e5fd8
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1 /* test/test-vcf-api.c -- VCF test harness.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3 Copyright (C) 2013, 2014 Genome Research Ltd.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
5 Author: Petr Danecek <pd3@sanger.ac.uk>
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
6
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
7 Permission is hereby granted, free of charge, to any person obtaining a copy
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
8 of this software and associated documentation files (the "Software"), to deal
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
9 in the Software without restriction, including without limitation the rights
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
10 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
11 copies of the Software, and to permit persons to whom the Software is
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
12 furnished to do so, subject to the following conditions:
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
13
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
14 The above copyright notice and this permission notice shall be included in
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
15 all copies or substantial portions of the Software.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
16
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
17 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
18 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
19 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
20 THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
21 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
22 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
23 DEALINGS IN THE SOFTWARE. */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
24
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
25 #include <stdio.h>
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
26 #include <htslib/hts.h>
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
27 #include <htslib/vcf.h>
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
28 #include <htslib/kstring.h>
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
29 #include <htslib/kseq.h>
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
30
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
31 void write_bcf(char *fname)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
32 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
33 // Init
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
34 htsFile *fp = hts_open(fname,"wb");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
35 bcf_hdr_t *hdr = bcf_hdr_init("w");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
36 bcf1_t *rec = bcf_init1();
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
37
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
38 // Create VCF header
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
39 kstring_t str = {0,0,0};
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
40 bcf_hdr_append(hdr, "##fileDate=20090805");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
41 bcf_hdr_append(hdr, "##FORMAT=<ID=UF,Number=1,Type=Integer,Description=\"Unused FORMAT\">");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
42 bcf_hdr_append(hdr, "##INFO=<ID=UI,Number=1,Type=Integer,Description=\"Unused INFO\">");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
43 bcf_hdr_append(hdr, "##FILTER=<ID=Flt,Description=\"Unused FILTER\">");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
44 bcf_hdr_append(hdr, "##unused=<XX=AA,Description=\"Unused generic\">");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
45 bcf_hdr_append(hdr, "##unused=unformatted text 1");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
46 bcf_hdr_append(hdr, "##unused=unformatted text 2");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
47 bcf_hdr_append(hdr, "##contig=<ID=Unused,length=62435964>");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
48 bcf_hdr_append(hdr, "##source=myImputationProgramV3.1");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
49 bcf_hdr_append(hdr, "##reference=file:///seq/references/1000GenomesPilot-NCBI36.fasta");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
50 bcf_hdr_append(hdr, "##contig=<ID=20,length=62435964,assembly=B36,md5=f126cdf8a6e0c7f379d618ff66beb2da,species=\"Homo sapiens\",taxonomy=x>");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
51 bcf_hdr_append(hdr, "##phasing=partial");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
52 bcf_hdr_append(hdr, "##INFO=<ID=NS,Number=1,Type=Integer,Description=\"Number of Samples With Data\">");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
53 bcf_hdr_append(hdr, "##INFO=<ID=DP,Number=1,Type=Integer,Description=\"Total Depth\">");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
54 bcf_hdr_append(hdr, "##INFO=<ID=AF,Number=A,Type=Float,Description=\"Allele Frequency\">");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
55 bcf_hdr_append(hdr, "##INFO=<ID=AA,Number=1,Type=String,Description=\"Ancestral Allele\">");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
56 bcf_hdr_append(hdr, "##INFO=<ID=DB,Number=0,Type=Flag,Description=\"dbSNP membership, build 129\">");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
57 bcf_hdr_append(hdr, "##INFO=<ID=H2,Number=0,Type=Flag,Description=\"HapMap2 membership\">");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
58 bcf_hdr_append(hdr, "##FILTER=<ID=q10,Description=\"Quality below 10\">");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
59 bcf_hdr_append(hdr, "##FILTER=<ID=s50,Description=\"Less than 50% of samples have data\">");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
60 bcf_hdr_append(hdr, "##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
61 bcf_hdr_append(hdr, "##FORMAT=<ID=GQ,Number=1,Type=Integer,Description=\"Genotype Quality\">");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
62 bcf_hdr_append(hdr, "##FORMAT=<ID=DP,Number=1,Type=Integer,Description=\"Read Depth\">");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
63 bcf_hdr_append(hdr, "##FORMAT=<ID=HQ,Number=2,Type=Integer,Description=\"Haplotype Quality\">");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
64 bcf_hdr_append(hdr, "##FORMAT=<ID=TS,Number=1,Type=String,Description=\"Test String\">");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
65
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
66 bcf_hdr_add_sample(hdr, "NA00001");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
67 bcf_hdr_add_sample(hdr, "NA00002");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
68 bcf_hdr_add_sample(hdr, "NA00003");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
69 bcf_hdr_add_sample(hdr, NULL); // to update internal structures
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
70 bcf_hdr_write(fp, hdr);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
71
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
72
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
73 // Add a record
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
74 // 20 14370 rs6054257 G A 29 PASS NS=3;DP=14;AF=0.5;DB;H2 GT:GQ:DP:HQ 0|0:48:1:51,51 1|0:48:8:51,51 1/1:43:5:.,.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
75 // .. CHROM
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
76 rec->rid = bcf_hdr_name2id(hdr, "20");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
77 // .. POS
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
78 rec->pos = 14369;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
79 // .. ID
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
80 bcf_update_id(hdr, rec, "rs6054257");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
81 // .. REF and ALT
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
82 bcf_update_alleles_str(hdr, rec, "G,A");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
83 // .. QUAL
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
84 rec->qual = 29;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
85 // .. FILTER
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
86 int32_t tmpi = bcf_hdr_id2int(hdr, BCF_DT_ID, "PASS");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
87 bcf_update_filter(hdr, rec, &tmpi, 1);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
88 // .. INFO
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
89 tmpi = 3;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
90 bcf_update_info_int32(hdr, rec, "NS", &tmpi, 1);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
91 tmpi = 14;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
92 bcf_update_info_int32(hdr, rec, "DP", &tmpi, 1);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
93 float tmpf = 0.5;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
94 bcf_update_info_float(hdr, rec, "AF", &tmpf, 1);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
95 bcf_update_info_flag(hdr, rec, "DB", NULL, 1);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
96 bcf_update_info_flag(hdr, rec, "H2", NULL, 1);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
97 // .. FORMAT
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
98 int32_t *tmpia = (int*)malloc(bcf_hdr_nsamples(hdr)*2*sizeof(int));
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
99 tmpia[0] = bcf_gt_phased(0);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
100 tmpia[1] = bcf_gt_phased(0);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
101 tmpia[2] = bcf_gt_phased(1);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
102 tmpia[3] = bcf_gt_phased(0);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
103 tmpia[4] = bcf_gt_unphased(1);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
104 tmpia[5] = bcf_gt_unphased(1);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
105 bcf_update_genotypes(hdr, rec, tmpia, bcf_hdr_nsamples(hdr)*2);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
106 tmpia[0] = 48;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
107 tmpia[1] = 48;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
108 tmpia[2] = 43;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
109 bcf_update_format_int32(hdr, rec, "GQ", tmpia, bcf_hdr_nsamples(hdr));
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
110 tmpia[0] = 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
111 tmpia[1] = 8;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
112 tmpia[2] = 5;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
113 bcf_update_format_int32(hdr, rec, "DP", tmpia, bcf_hdr_nsamples(hdr));
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
114 tmpia[0] = 51;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
115 tmpia[1] = 51;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
116 tmpia[2] = 51;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
117 tmpia[3] = 51;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
118 tmpia[4] = bcf_int32_missing;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
119 tmpia[5] = bcf_int32_missing;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
120 bcf_update_format_int32(hdr, rec, "HQ", tmpia, bcf_hdr_nsamples(hdr)*2);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
121 char *tmp_str[] = {"String1","SomeOtherString2","YetAnotherString3"};
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
122 bcf_update_format_string(hdr, rec, "TS", (const char**)tmp_str, 3);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
123 bcf_write1(fp, hdr, rec);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
124
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
125 // 20 1110696 . A G,T 67 . NS=2;DP=10;AF=0.333,.;AA=T;DB GT 2 1 ./.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
126 bcf_clear1(rec);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
127 rec->rid = bcf_hdr_name2id(hdr, "20");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
128 rec->pos = 1110695;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
129 bcf_update_alleles_str(hdr, rec, "A,G,T");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
130 rec->qual = 67;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
131 tmpi = 2;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
132 bcf_update_info_int32(hdr, rec, "NS", &tmpi, 1);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
133 tmpi = 10;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
134 bcf_update_info_int32(hdr, rec, "DP", &tmpi, 1);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
135 float *tmpfa = (float*)malloc(2*sizeof(float));
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
136 tmpfa[0] = 0.333;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
137 bcf_float_set_missing(tmpfa[1]);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
138 bcf_update_info_float(hdr, rec, "AF", tmpfa, 2);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
139 bcf_update_info_string(hdr, rec, "AA", "T");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
140 bcf_update_info_flag(hdr, rec, "DB", NULL, 1);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
141 tmpia[0] = bcf_gt_phased(2);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
142 tmpia[1] = bcf_int32_vector_end;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
143 tmpia[2] = bcf_gt_phased(1);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
144 tmpia[3] = bcf_int32_vector_end;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
145 tmpia[4] = bcf_gt_missing;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
146 tmpia[5] = bcf_gt_missing;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
147 bcf_update_genotypes(hdr, rec, tmpia, bcf_hdr_nsamples(hdr)*2);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
148 bcf_write1(fp, hdr, rec);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
149
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
150 free(tmpia);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
151 free(tmpfa);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
152
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
153 // Clean
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
154 free(str.s);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
155 bcf_destroy1(rec);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
156 bcf_hdr_destroy(hdr);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
157 int ret;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
158 if ( (ret=hts_close(fp)) )
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
159 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
160 fprintf(stderr,"hts_close(%s): non-zero status %d\n",fname,ret);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
161 exit(ret);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
162 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
163 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
164
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
165 void bcf_to_vcf(char *fname)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
166 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
167 htsFile *fp = hts_open(fname,"rb");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
168 bcf_hdr_t *hdr = bcf_hdr_read(fp);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
169 bcf1_t *rec = bcf_init1();
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
170
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
171 char *gz_fname = (char*) malloc(strlen(fname)+4);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
172 snprintf(gz_fname,strlen(fname)+4,"%s.gz",fname);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
173 htsFile *out = hts_open(gz_fname,"wg");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
174
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
175 bcf_hdr_t *hdr_out = bcf_hdr_dup(hdr);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
176 bcf_hdr_remove(hdr_out,BCF_HL_STR,"unused");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
177 bcf_hdr_remove(hdr_out,BCF_HL_GEN,"unused");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
178 bcf_hdr_remove(hdr_out,BCF_HL_FLT,"Flt");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
179 bcf_hdr_remove(hdr_out,BCF_HL_INFO,"UI");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
180 bcf_hdr_remove(hdr_out,BCF_HL_FMT,"UF");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
181 bcf_hdr_remove(hdr_out,BCF_HL_CTG,"Unused");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
182 bcf_hdr_write(out, hdr_out);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
183
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
184 while ( bcf_read1(fp, hdr, rec)>=0 )
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
185 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
186 bcf_write1(out, hdr_out, rec);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
187
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
188 // Test problems caused by bcf1_sync: the data block
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
189 // may be realloced, also the unpacked structures must
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
190 // get updated.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
191 bcf_unpack(rec, BCF_UN_STR);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
192 bcf_update_id(hdr, rec, 0);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
193 bcf_update_format_int32(hdr, rec, "GQ", NULL, 0);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
194
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
195 bcf1_t *dup = bcf_dup(rec); // force bcf1_sync call
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
196 bcf_write1(out, hdr_out, dup);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
197 bcf_destroy1(dup);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
198
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
199 bcf_update_alleles_str(hdr_out, rec, "G,A");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
200 int32_t tmpi = 99;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
201 bcf_update_info_int32(hdr_out, rec, "DP", &tmpi, 1);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
202 int32_t tmpia[] = {9,9,9};
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
203 bcf_update_format_int32(hdr_out, rec, "DP", tmpia, 3);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
204
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
205 bcf_write1(out, hdr_out, rec);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
206 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
207
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
208 bcf_destroy1(rec);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
209 bcf_hdr_destroy(hdr);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
210 bcf_hdr_destroy(hdr_out);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
211 int ret;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
212 if ( (ret=hts_close(fp)) )
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
213 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
214 fprintf(stderr,"hts_close(%s): non-zero status %d\n",fname,ret);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
215 exit(ret);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
216 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
217 if ( (ret=hts_close(out)) )
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
218 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
219 fprintf(stderr,"hts_close(%s): non-zero status %d\n",gz_fname,ret);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
220 exit(ret);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
221 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
222
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
223
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
224 // read gzip, write stdout
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
225 htsFile *gz_in = hts_open(gz_fname, "r");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
226 if ( !gz_in )
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
227 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
228 fprintf(stderr,"Could not read: %s\n", gz_fname);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
229 exit(1);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
230 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
231
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
232 kstring_t line = {0,0,0};
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
233 while ( hts_getline(gz_in, KS_SEP_LINE, &line)>0 )
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
234 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
235 kputc('\n',&line);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
236 fwrite(line.s,1,line.l,stdout);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
237 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
238
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
239 if ( (ret=hts_close(gz_in)) )
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
240 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
241 fprintf(stderr,"hts_close(%s): non-zero status %d\n",gz_fname,ret);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
242 exit(ret);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
243 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
244 free(line.s);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
245 free(gz_fname);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
246 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
247
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
248 void iterator(const char *fname)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
249 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
250 htsFile *fp = hts_open(fname, "r");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
251 bcf_hdr_t *hdr = bcf_hdr_read(fp);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
252 hts_idx_t *idx;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
253 hts_itr_t *iter;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
254
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
255 bcf_index_build(fname, 0);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
256 idx = bcf_index_load(fname);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
257
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
258 iter = bcf_itr_queryi(idx, bcf_hdr_name2id(hdr, "20"), 1110600, 1110800);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
259 bcf_itr_destroy(iter);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
260
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
261 iter = bcf_itr_querys(idx, hdr, "20:1110600-1110800");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
262 bcf_itr_destroy(iter);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
263
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
264 hts_idx_destroy(idx);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
265 bcf_hdr_destroy(hdr);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
266 int ret;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
267 if ( (ret=hts_close(fp)) )
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
268 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
269 fprintf(stderr,"hts_close(%s): non-zero status %d\n",fname,ret);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
270 exit(ret);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
271 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
272 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
273
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
274 int main(int argc, char **argv)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
275 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
276 char *fname = argc>1 ? argv[1] : "rmme.bcf";
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
277 write_bcf(fname);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
278 bcf_to_vcf(fname);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
279 iterator(fname);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
280 return 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
281 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
282