annotate gecko/src/hdStat.c @ 11:cf4c0c822ca9 draft

Uploaded
author bitlab
date Wed, 18 Nov 2020 08:37:31 +0000
parents 9db88f0f32b7
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
9db88f0f32b7 Uploaded
bitlab
parents:
diff changeset
1 /* leehd read and displays the hash table from disk
9db88f0f32b7 Uploaded
bitlab
parents:
diff changeset
2 Syntax: leehd prefixNameOUT
9db88f0f32b7 Uploaded
bitlab
parents:
diff changeset
3
9db88f0f32b7 Uploaded
bitlab
parents:
diff changeset
4 prefixNameOUT.h2dW : index of words-Pos-Ocurrences
9db88f0f32b7 Uploaded
bitlab
parents:
diff changeset
5 prefixNameOUT.h2dP : positions
9db88f0f32b7 Uploaded
bitlab
parents:
diff changeset
6 both must be available
9db88f0f32b7 Uploaded
bitlab
parents:
diff changeset
7
9db88f0f32b7 Uploaded
bitlab
parents:
diff changeset
8 Any char as third argument means "Verbose mode"
9db88f0f32b7 Uploaded
bitlab
parents:
diff changeset
9 Feb.2012: computes word frequencies
9db88f0f32b7 Uploaded
bitlab
parents:
diff changeset
10
9db88f0f32b7 Uploaded
bitlab
parents:
diff changeset
11 ortrelles@uma.es / Dic.2011
9db88f0f32b7 Uploaded
bitlab
parents:
diff changeset
12 ---------------------------------------------------------*/
9db88f0f32b7 Uploaded
bitlab
parents:
diff changeset
13
9db88f0f32b7 Uploaded
bitlab
parents:
diff changeset
14 #include <stdio.h>
9db88f0f32b7 Uploaded
bitlab
parents:
diff changeset
15 #include <stdlib.h>
9db88f0f32b7 Uploaded
bitlab
parents:
diff changeset
16 #include <string.h>
9db88f0f32b7 Uploaded
bitlab
parents:
diff changeset
17 #include <stdlib.h>
9db88f0f32b7 Uploaded
bitlab
parents:
diff changeset
18 #include <errno.h>
9db88f0f32b7 Uploaded
bitlab
parents:
diff changeset
19 #include <inttypes.h>
9db88f0f32b7 Uploaded
bitlab
parents:
diff changeset
20
9db88f0f32b7 Uploaded
bitlab
parents:
diff changeset
21 #include "structs.h"
9db88f0f32b7 Uploaded
bitlab
parents:
diff changeset
22 #include "commonFunctions.h"
9db88f0f32b7 Uploaded
bitlab
parents:
diff changeset
23 #include "dictionaryFunctions.h"
9db88f0f32b7 Uploaded
bitlab
parents:
diff changeset
24
9db88f0f32b7 Uploaded
bitlab
parents:
diff changeset
25 #define PEQ 1001
9db88f0f32b7 Uploaded
bitlab
parents:
diff changeset
26
9db88f0f32b7 Uploaded
bitlab
parents:
diff changeset
27 int main(int ac, char** av){
9db88f0f32b7 Uploaded
bitlab
parents:
diff changeset
28
9db88f0f32b7 Uploaded
bitlab
parents:
diff changeset
29 char fname[1024], *W;
9db88f0f32b7 Uploaded
bitlab
parents:
diff changeset
30 W=(char *)malloc(33*sizeof(char));
9db88f0f32b7 Uploaded
bitlab
parents:
diff changeset
31 FILE *f1, *f2, *f3;
9db88f0f32b7 Uploaded
bitlab
parents:
diff changeset
32 hashentry he;
9db88f0f32b7 Uploaded
bitlab
parents:
diff changeset
33 uint64_t i=0;
9db88f0f32b7 Uploaded
bitlab
parents:
diff changeset
34 location spos;
9db88f0f32b7 Uploaded
bitlab
parents:
diff changeset
35 uint64_t nW=0,maxF=0, aveF=0;
9db88f0f32b7 Uploaded
bitlab
parents:
diff changeset
36 int flagV=0;
9db88f0f32b7 Uploaded
bitlab
parents:
diff changeset
37 int64_t freq[PEQ];
9db88f0f32b7 Uploaded
bitlab
parents:
diff changeset
38
9db88f0f32b7 Uploaded
bitlab
parents:
diff changeset
39 if(ac<2)terror("USE: leehd prefixNameOUT [v=verbose]\n");
9db88f0f32b7 Uploaded
bitlab
parents:
diff changeset
40 if (ac==3) flagV=1;
9db88f0f32b7 Uploaded
bitlab
parents:
diff changeset
41 for (i=0;i<PEQ;i++) freq[i]=0;
9db88f0f32b7 Uploaded
bitlab
parents:
diff changeset
42 sprintf(fname,"%s.d2hW",av[1]); // Words file (first level of hash table)
9db88f0f32b7 Uploaded
bitlab
parents:
diff changeset
43 if ((f1 = fopen(fname,"rb"))==NULL) terror("opening prefix.h2dW file");
9db88f0f32b7 Uploaded
bitlab
parents:
diff changeset
44 sprintf(fname,"%s.d2hP",av[1]); // Positions file
9db88f0f32b7 Uploaded
bitlab
parents:
diff changeset
45 if ((f2 = fopen(fname,"rb"))==NULL) terror("opening prefix.h2dP file");
9db88f0f32b7 Uploaded
bitlab
parents:
diff changeset
46
9db88f0f32b7 Uploaded
bitlab
parents:
diff changeset
47 sprintf(fname,"%s.freq",av[1]); // output
9db88f0f32b7 Uploaded
bitlab
parents:
diff changeset
48 if ((f3 = fopen(fname,"wt"))==NULL) terror("opening prefix.freq OUT file");
9db88f0f32b7 Uploaded
bitlab
parents:
diff changeset
49
9db88f0f32b7 Uploaded
bitlab
parents:
diff changeset
50 // kick-off
9db88f0f32b7 Uploaded
bitlab
parents:
diff changeset
51 if(fread(&he,sizeof(hashentry),1,f1)!=1)
9db88f0f32b7 Uploaded
bitlab
parents:
diff changeset
52 terror("Empty dictionary");
9db88f0f32b7 Uploaded
bitlab
parents:
diff changeset
53
9db88f0f32b7 Uploaded
bitlab
parents:
diff changeset
54 while(!feof(f1)){
9db88f0f32b7 Uploaded
bitlab
parents:
diff changeset
55
9db88f0f32b7 Uploaded
bitlab
parents:
diff changeset
56 if (flagV) {showWord(&he.w, W);fprintf(stdout, "%.32s", W);}
9db88f0f32b7 Uploaded
bitlab
parents:
diff changeset
57 if (flagV) fprintf(stdout," : num=%-7" PRIu64 ":",he.num);
9db88f0f32b7 Uploaded
bitlab
parents:
diff changeset
58 if (he.num>=PEQ) {
9db88f0f32b7 Uploaded
bitlab
parents:
diff changeset
59 fprintf(f3, "%" PRIu64 "\t", he.num);
9db88f0f32b7 Uploaded
bitlab
parents:
diff changeset
60 showWord(&he.w, W);
9db88f0f32b7 Uploaded
bitlab
parents:
diff changeset
61 fprintf(f3, "%.32s", W);
9db88f0f32b7 Uploaded
bitlab
parents:
diff changeset
62 fprintf(f3, "%" PRIu64 "\n", he.num);
9db88f0f32b7 Uploaded
bitlab
parents:
diff changeset
63 }
9db88f0f32b7 Uploaded
bitlab
parents:
diff changeset
64 else freq[he.num]++;
9db88f0f32b7 Uploaded
bitlab
parents:
diff changeset
65 nW++;
9db88f0f32b7 Uploaded
bitlab
parents:
diff changeset
66 if (he.num>maxF) maxF=he.num;
9db88f0f32b7 Uploaded
bitlab
parents:
diff changeset
67 aveF+=he.num;
9db88f0f32b7 Uploaded
bitlab
parents:
diff changeset
68
9db88f0f32b7 Uploaded
bitlab
parents:
diff changeset
69 fseek(f2,0, he.pos);
9db88f0f32b7 Uploaded
bitlab
parents:
diff changeset
70 if (flagV) {
9db88f0f32b7 Uploaded
bitlab
parents:
diff changeset
71
9db88f0f32b7 Uploaded
bitlab
parents:
diff changeset
72 for (i=0;i<he.num;i++){
9db88f0f32b7 Uploaded
bitlab
parents:
diff changeset
73 if(fread(&spos,sizeof(location),1,f2)!=1)
9db88f0f32b7 Uploaded
bitlab
parents:
diff changeset
74 terror("Error reading the word occurrences");
9db88f0f32b7 Uploaded
bitlab
parents:
diff changeset
75 fprintf(stdout,"(%" PRIu64 ",%" PRIu64 ") ",spos.pos,spos.seq);
9db88f0f32b7 Uploaded
bitlab
parents:
diff changeset
76 }
9db88f0f32b7 Uploaded
bitlab
parents:
diff changeset
77 fprintf(stdout,"\n");
9db88f0f32b7 Uploaded
bitlab
parents:
diff changeset
78 }
9db88f0f32b7 Uploaded
bitlab
parents:
diff changeset
79 if(fread(&he,sizeof(hashentry),1,f1)!=1)
9db88f0f32b7 Uploaded
bitlab
parents:
diff changeset
80 if(ferror(f1))
9db88f0f32b7 Uploaded
bitlab
parents:
diff changeset
81 terror("Error reading a dictionary entry");
9db88f0f32b7 Uploaded
bitlab
parents:
diff changeset
82 }
9db88f0f32b7 Uploaded
bitlab
parents:
diff changeset
83 free(W);
9db88f0f32b7 Uploaded
bitlab
parents:
diff changeset
84
9db88f0f32b7 Uploaded
bitlab
parents:
diff changeset
85 fclose(f1);
9db88f0f32b7 Uploaded
bitlab
parents:
diff changeset
86 fclose(f2);
9db88f0f32b7 Uploaded
bitlab
parents:
diff changeset
87 // store PEQ freqs--------
9db88f0f32b7 Uploaded
bitlab
parents:
diff changeset
88 fprintf(f3,"freqs of words that appear\nTimes\tnWords\n");
9db88f0f32b7 Uploaded
bitlab
parents:
diff changeset
89 for (i=0;i<PEQ;i++)
9db88f0f32b7 Uploaded
bitlab
parents:
diff changeset
90 if (freq[i]) fprintf(f3,"%" PRId64 "\t%" PRId64 "\n",i,freq[i]);
9db88f0f32b7 Uploaded
bitlab
parents:
diff changeset
91
9db88f0f32b7 Uploaded
bitlab
parents:
diff changeset
92 fclose(f3);
9db88f0f32b7 Uploaded
bitlab
parents:
diff changeset
93 fprintf(stdout,"Num.Words=%" PRIu64 " MaxFreq=%" PRIu64 " TotRepeat=%" PRIu64 " AveragFreq=%f\n",nW,maxF,aveF, (float)aveF/(float)nW);
9db88f0f32b7 Uploaded
bitlab
parents:
diff changeset
94
9db88f0f32b7 Uploaded
bitlab
parents:
diff changeset
95 exit(0);
9db88f0f32b7 Uploaded
bitlab
parents:
diff changeset
96 }
9db88f0f32b7 Uploaded
bitlab
parents:
diff changeset
97
9db88f0f32b7 Uploaded
bitlab
parents:
diff changeset
98
9db88f0f32b7 Uploaded
bitlab
parents:
diff changeset
99