0
|
1 /* leehd read and displays the hash table from disk
|
|
2 Syntax: leehd prefixNameOUT
|
|
3
|
|
4 prefixNameOUT.h2dW : index of words-Pos-Ocurrences
|
|
5 prefixNameOUT.h2dP : positions
|
|
6 both must be available
|
|
7
|
|
8 Any char as third argument means "Verbose mode"
|
|
9 Feb.2012: computes word frequencies
|
|
10
|
|
11 ortrelles@uma.es / Dic.2011
|
|
12 ---------------------------------------------------------*/
|
|
13
|
|
14 #include <stdio.h>
|
|
15 #include <stdlib.h>
|
|
16 #include <string.h>
|
|
17 #include <stdlib.h>
|
|
18 #include <errno.h>
|
|
19 #include <inttypes.h>
|
|
20
|
|
21 #include "structs.h"
|
|
22 #include "commonFunctions.h"
|
|
23 #include "dictionaryFunctions.h"
|
|
24
|
|
25 #define PEQ 1001
|
|
26
|
|
27 int main(int ac, char** av){
|
|
28
|
|
29 char fname[1024], *W;
|
|
30 W=(char *)malloc(33*sizeof(char));
|
|
31 FILE *f1, *f2, *f3;
|
|
32 hashentry he;
|
|
33 uint64_t i=0;
|
|
34 location spos;
|
|
35 uint64_t nW=0,maxF=0, aveF=0;
|
|
36 int flagV=0;
|
|
37 int64_t freq[PEQ];
|
|
38
|
|
39 if(ac<2)terror("USE: leehd prefixNameOUT [v=verbose]\n");
|
|
40 if (ac==3) flagV=1;
|
|
41 for (i=0;i<PEQ;i++) freq[i]=0;
|
|
42 sprintf(fname,"%s.d2hW",av[1]); // Words file (first level of hash table)
|
|
43 if ((f1 = fopen(fname,"rb"))==NULL) terror("opening prefix.h2dW file");
|
|
44 sprintf(fname,"%s.d2hP",av[1]); // Positions file
|
|
45 if ((f2 = fopen(fname,"rb"))==NULL) terror("opening prefix.h2dP file");
|
|
46
|
|
47 sprintf(fname,"%s.freq",av[1]); // output
|
|
48 if ((f3 = fopen(fname,"wt"))==NULL) terror("opening prefix.freq OUT file");
|
|
49
|
|
50 // kick-off
|
|
51 if(fread(&he,sizeof(hashentry),1,f1)!=1)
|
|
52 terror("Empty dictionary");
|
|
53
|
|
54 while(!feof(f1)){
|
|
55
|
|
56 if (flagV) {showWord(&he.w, W);fprintf(stdout, "%.32s", W);}
|
|
57 if (flagV) fprintf(stdout," : num=%-7" PRIu64 ":",he.num);
|
|
58 if (he.num>=PEQ) {
|
|
59 fprintf(f3, "%" PRIu64 "\t", he.num);
|
|
60 showWord(&he.w, W);
|
|
61 fprintf(f3, "%.32s", W);
|
|
62 fprintf(f3, "%" PRIu64 "\n", he.num);
|
|
63 }
|
|
64 else freq[he.num]++;
|
|
65 nW++;
|
|
66 if (he.num>maxF) maxF=he.num;
|
|
67 aveF+=he.num;
|
|
68
|
|
69 fseek(f2,0, he.pos);
|
|
70 if (flagV) {
|
|
71
|
|
72 for (i=0;i<he.num;i++){
|
|
73 if(fread(&spos,sizeof(location),1,f2)!=1)
|
|
74 terror("Error reading the word occurrences");
|
|
75 fprintf(stdout,"(%" PRIu64 ",%" PRIu64 ") ",spos.pos,spos.seq);
|
|
76 }
|
|
77 fprintf(stdout,"\n");
|
|
78 }
|
|
79 if(fread(&he,sizeof(hashentry),1,f1)!=1)
|
|
80 if(ferror(f1))
|
|
81 terror("Error reading a dictionary entry");
|
|
82 }
|
|
83 free(W);
|
|
84
|
|
85 fclose(f1);
|
|
86 fclose(f2);
|
|
87 // store PEQ freqs--------
|
|
88 fprintf(f3,"freqs of words that appear\nTimes\tnWords\n");
|
|
89 for (i=0;i<PEQ;i++)
|
|
90 if (freq[i]) fprintf(f3,"%" PRId64 "\t%" PRId64 "\n",i,freq[i]);
|
|
91
|
|
92 fclose(f3);
|
|
93 fprintf(stdout,"Num.Words=%" PRIu64 " MaxFreq=%" PRIu64 " TotRepeat=%" PRIu64 " AveragFreq=%f\n",nW,maxF,aveF, (float)aveF/(float)nW);
|
|
94
|
|
95 exit(0);
|
|
96 }
|
|
97
|
|
98
|
|
99
|