PocketSphinx  5prealpha
ptm_mgau.c
1 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /* ====================================================================
3  * Copyright (c) 1999-2010 Carnegie Mellon University. All rights
4  * reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright
11  * notice, this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright
14  * notice, this list of conditions and the following disclaimer in
15  * the documentation and/or other materials provided with the
16  * distribution.
17  *
18  * This work was supported in part by funding from the Defense Advanced
19  * Research Projects Agency and the National Science Foundation of the
20  * United States of America, and the CMU Sphinx Speech Consortium.
21  *
22  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  *
34  * ====================================================================
35  *
36  */
37 
38 /* System headers */
39 #include <stdio.h>
40 #include <stdlib.h>
41 #include <string.h>
42 #include <assert.h>
43 #include <limits.h>
44 #include <math.h>
45 #if defined(__ADSPBLACKFIN__)
46 #elif !defined(_WIN32_WCE)
47 #include <sys/types.h>
48 #endif
49 
50 /* SphinxBase headers */
51 #include <sphinx_config.h>
52 #include <sphinxbase/cmd_ln.h>
53 #include <sphinxbase/fixpoint.h>
54 #include <sphinxbase/ckd_alloc.h>
55 #include <sphinxbase/bio.h>
56 #include <sphinxbase/err.h>
57 #include <sphinxbase/prim_type.h>
58 
59 /* Local headers */
60 #include "tied_mgau_common.h"
61 #include "ptm_mgau.h"
62 
63 static ps_mgaufuncs_t ptm_mgau_funcs = {
64  "ptm",
65  ptm_mgau_frame_eval, /* frame_eval */
66  ptm_mgau_mllr_transform, /* transform */
67  ptm_mgau_free /* free */
68 };
69 
70 #define COMPUTE_GMM_MAP(_idx) \
71  diff[_idx] = obs[_idx] - mean[_idx]; \
72  sqdiff[_idx] = MFCCMUL(diff[_idx], diff[_idx]); \
73  compl[_idx] = MFCCMUL(sqdiff[_idx], var[_idx]);
74 #define COMPUTE_GMM_REDUCE(_idx) \
75  d = GMMSUB(d, compl[_idx]);
76 
77 static void
78 insertion_sort_topn(ptm_topn_t *topn, int i, int32 d)
79 {
80  ptm_topn_t vtmp;
81  int j;
82 
83  topn[i].score = d;
84  if (i == 0)
85  return;
86  vtmp = topn[i];
87  for (j = i - 1; j >= 0 && d > topn[j].score; j--) {
88  topn[j + 1] = topn[j];
89  }
90  topn[j + 1] = vtmp;
91 }
92 
93 static int
94 eval_topn(ptm_mgau_t *s, int cb, int feat, mfcc_t *z)
95 {
96  ptm_topn_t *topn;
97  int i, ceplen;
98 
99  topn = s->f->topn[cb][feat];
100  ceplen = s->g->featlen[feat];
101 
102  for (i = 0; i < s->max_topn; i++) {
103  mfcc_t *mean, diff[4], sqdiff[4], compl[4]; /* diff, diff^2, component likelihood */
104  mfcc_t *var, d;
105  mfcc_t *obs;
106  int32 cw, j;
107 
108  cw = topn[i].cw;
109  mean = s->g->mean[cb][feat][0] + cw * ceplen;
110  var = s->g->var[cb][feat][0] + cw * ceplen;
111  d = s->g->det[cb][feat][cw];
112  obs = z;
113  for (j = 0; j < ceplen % 4; ++j) {
114  diff[0] = *obs++ - *mean++;
115  sqdiff[0] = MFCCMUL(diff[0], diff[0]);
116  compl[0] = MFCCMUL(sqdiff[0], *var);
117  d = GMMSUB(d, compl[0]);
118  ++var;
119  }
120  /* We could vectorize this but it's unlikely to make much
121  * difference as the outer loop here isn't very big. */
122  for (;j < ceplen; j += 4) {
123  COMPUTE_GMM_MAP(0);
124  COMPUTE_GMM_MAP(1);
125  COMPUTE_GMM_MAP(2);
126  COMPUTE_GMM_MAP(3);
127  COMPUTE_GMM_REDUCE(0);
128  COMPUTE_GMM_REDUCE(1);
129  COMPUTE_GMM_REDUCE(2);
130  COMPUTE_GMM_REDUCE(3);
131  var += 4;
132  obs += 4;
133  mean += 4;
134  }
135  if (d < (mfcc_t)INT_MIN) /* Redundant if FIXED_POINT */
136  insertion_sort_topn(topn, i, INT_MIN);
137  else
138  insertion_sort_topn(topn, i, (int32)d);
139  }
140 
141  return topn[0].score;
142 }
143 
144 /* This looks bad, but it actually isn't. Less than 1% of eval_cb's
145  * time is spent doing this. */
146 static void
147 insertion_sort_cb(ptm_topn_t **cur, ptm_topn_t *worst, ptm_topn_t *best,
148  int cw, int32 intd)
149 {
150  for (*cur = worst - 1; *cur >= best && intd >= (*cur)->score; --*cur)
151  memcpy(*cur + 1, *cur, sizeof(**cur));
152  ++*cur;
153  (*cur)->cw = cw;
154  (*cur)->score = intd;
155 }
156 
157 static int
158 eval_cb(ptm_mgau_t *s, int cb, int feat, mfcc_t *z)
159 {
160  ptm_topn_t *worst, *best, *topn;
161  mfcc_t *mean;
162  mfcc_t *var, *det, *detP, *detE;
163  int32 i, ceplen;
164 
165  best = topn = s->f->topn[cb][feat];
166  worst = topn + (s->max_topn - 1);
167  mean = s->g->mean[cb][feat][0];
168  var = s->g->var[cb][feat][0];
169  det = s->g->det[cb][feat];
170  detE = det + s->g->n_density;
171  ceplen = s->g->featlen[feat];
172 
173  for (detP = det; detP < detE; ++detP) {
174  mfcc_t diff[4], sqdiff[4], compl[4]; /* diff, diff^2, component likelihood */
175  mfcc_t d, thresh;
176  mfcc_t *obs;
177  ptm_topn_t *cur;
178  int32 cw, j;
179 
180  d = *detP;
181  thresh = (mfcc_t) worst->score; /* Avoid int-to-float conversions */
182  obs = z;
183  cw = (int)(detP - det);
184 
185  /* Unroll the loop starting with the first dimension(s). In
186  * theory this might be a bit faster if this Gaussian gets
187  * "knocked out" by C0. In practice not. */
188  for (j = 0; (j < ceplen % 4) && (d >= thresh); ++j) {
189  diff[0] = *obs++ - *mean++;
190  sqdiff[0] = MFCCMUL(diff[0], diff[0]);
191  compl[0] = MFCCMUL(sqdiff[0], *var++);
192  d = GMMSUB(d, compl[0]);
193  }
194  /* Now do 4 dimensions at a time. You'd think that GCC would
195  * vectorize this? Apparently not. And it's right, because
196  * that won't make this any faster, at least on x86-64. */
197  for (; j < ceplen && d >= thresh; j += 4) {
198  COMPUTE_GMM_MAP(0);
199  COMPUTE_GMM_MAP(1);
200  COMPUTE_GMM_MAP(2);
201  COMPUTE_GMM_MAP(3);
202  COMPUTE_GMM_REDUCE(0);
203  COMPUTE_GMM_REDUCE(1);
204  COMPUTE_GMM_REDUCE(2);
205  COMPUTE_GMM_REDUCE(3);
206  var += 4;
207  obs += 4;
208  mean += 4;
209  }
210  if (j < ceplen) {
211  /* terminated early, so not in topn */
212  mean += (ceplen - j);
213  var += (ceplen - j);
214  continue;
215  }
216  if (d < thresh)
217  continue;
218  for (i = 0; i < s->max_topn; i++) {
219  /* already there, so don't need to insert */
220  if (topn[i].cw == cw)
221  break;
222  }
223  if (i < s->max_topn)
224  continue; /* already there. Don't insert */
225  if (d < (mfcc_t)INT_MIN) /* Redundant if FIXED_POINT */
226  insertion_sort_cb(&cur, worst, best, cw, INT_MIN);
227  else
228  insertion_sort_cb(&cur, worst, best, cw, (int32)d);
229  }
230 
231  return best->score;
232 }
233 
237 static int
238 ptm_mgau_codebook_eval(ptm_mgau_t *s, mfcc_t **z, int frame)
239 {
240  int i, j;
241 
242  /* First evaluate top-N from previous frame. */
243  for (i = 0; i < s->g->n_mgau; ++i)
244  for (j = 0; j < s->g->n_feat; ++j)
245  eval_topn(s, i, j, z[j]);
246 
247  /* If frame downsampling is in effect, possibly do nothing else. */
248  if (frame % s->ds_ratio)
249  return 0;
250 
251  /* Evaluate remaining codebooks. */
252  for (i = 0; i < s->g->n_mgau; ++i) {
253  if (bitvec_is_clear(s->f->mgau_active, i))
254  continue;
255  for (j = 0; j < s->g->n_feat; ++j) {
256  eval_cb(s, i, j, z[j]);
257  }
258  }
259  return 0;
260 }
261 
271 static int
272 ptm_mgau_codebook_norm(ptm_mgau_t *s, mfcc_t **z, int frame)
273 {
274  int i, j;
275 
276  for (j = 0; j < s->g->n_feat; ++j) {
277  int32 norm = WORST_SCORE;
278  for (i = 0; i < s->g->n_mgau; ++i) {
279  if (bitvec_is_clear(s->f->mgau_active, i))
280  continue;
281  if (norm < s->f->topn[i][j][0].score >> SENSCR_SHIFT)
282  norm = s->f->topn[i][j][0].score >> SENSCR_SHIFT;
283  }
284  for (i = 0; i < s->g->n_mgau; ++i) {
285  int32 k;
286  if (bitvec_is_clear(s->f->mgau_active, i))
287  continue;
288  for (k = 0; k < s->max_topn; ++k) {
289  s->f->topn[i][j][k].score >>= SENSCR_SHIFT;
290  s->f->topn[i][j][k].score -= norm;
291  s->f->topn[i][j][k].score = -s->f->topn[i][j][k].score;
292  if (s->f->topn[i][j][k].score > MAX_NEG_ASCR)
293  s->f->topn[i][j][k].score = MAX_NEG_ASCR;
294  }
295  }
296  }
297 
298  return 0;
299 }
300 
301 static int
302 ptm_mgau_calc_cb_active(ptm_mgau_t *s, uint8 *senone_active,
303  int32 n_senone_active, int compallsen)
304 {
305  int i, lastsen;
306 
307  if (compallsen) {
308  bitvec_set_all(s->f->mgau_active, s->g->n_mgau);
309  return 0;
310  }
311  bitvec_clear_all(s->f->mgau_active, s->g->n_mgau);
312  for (lastsen = i = 0; i < n_senone_active; ++i) {
313  int sen = senone_active[i] + lastsen;
314  int cb = s->sen2cb[sen];
315  bitvec_set(s->f->mgau_active, cb);
316  lastsen = sen;
317  }
318  E_DEBUG(1, ("Active codebooks:"));
319  for (i = 0; i < s->g->n_mgau; ++i) {
320  if (bitvec_is_clear(s->f->mgau_active, i))
321  continue;
322  E_DEBUGCONT(1, (" %d", i));
323  }
324  E_DEBUGCONT(1, ("\n"));
325  return 0;
326 }
327 
331 static int
332 ptm_mgau_senone_eval(ptm_mgau_t *s, int16 *senone_scores,
333  uint8 *senone_active, int32 n_senone_active,
334  int compall)
335 {
336  int i, lastsen, bestscore;
337 
338  memset(senone_scores, 0, s->n_sen * sizeof(*senone_scores));
339  /* FIXME: This is the non-cache-efficient way to do this. We want
340  * to evaluate one codeword at a time but this requires us to have
341  * a reverse codebook to senone mapping, which we don't have
342  * (yet), since different codebooks have different top-N
343  * codewords. */
344  if (compall)
345  n_senone_active = s->n_sen;
346  bestscore = 0x7fffffff;
347  for (lastsen = i = 0; i < n_senone_active; ++i) {
348  int sen, f, cb;
349  int ascore;
350 
351  if (compall)
352  sen = i;
353  else
354  sen = senone_active[i] + lastsen;
355  lastsen = sen;
356  cb = s->sen2cb[sen];
357 
358  if (bitvec_is_clear(s->f->mgau_active, cb)) {
359  int j;
360  /* Because senone_active is deltas we can't really "knock
361  * out" senones from pruned codebooks, and in any case,
362  * it wouldn't make any difference to the search code,
363  * which doesn't expect senone_active to change. */
364  for (f = 0; f < s->g->n_feat; ++f) {
365  for (j = 0; j < s->max_topn; ++j) {
366  s->f->topn[cb][f][j].score = MAX_NEG_ASCR;
367  }
368  }
369  }
370  /* For each feature, log-sum codeword scores + mixw to get
371  * feature density, then sum (multiply) to get ascore */
372  ascore = 0;
373  for (f = 0; f < s->g->n_feat; ++f) {
374  ptm_topn_t *topn;
375  int j, fden = 0;
376  topn = s->f->topn[cb][f];
377  for (j = 0; j < s->max_topn; ++j) {
378  int mixw;
379  /* Find mixture weight for this codeword. */
380  if (s->mixw_cb) {
381  int dcw = s->mixw[f][topn[j].cw][sen/2];
382  dcw = (dcw & 1) ? dcw >> 4 : dcw & 0x0f;
383  mixw = s->mixw_cb[dcw];
384  }
385  else {
386  mixw = s->mixw[f][topn[j].cw][sen];
387  }
388  if (j == 0)
389  fden = mixw + topn[j].score;
390  else
391  fden = fast_logmath_add(s->lmath_8b, fden,
392  mixw + topn[j].score);
393  E_DEBUG(3, ("fden[%d][%d] l+= %d + %d = %d\n",
394  sen, f, mixw, topn[j].score, fden));
395  }
396  ascore += fden;
397  }
398  if (ascore < bestscore) bestscore = ascore;
399  senone_scores[sen] = ascore;
400  }
401  /* Normalize the scores again (finishing the job we started above
402  * in ptm_mgau_codebook_eval...) */
403  for (i = 0; i < s->n_sen; ++i) {
404  senone_scores[i] -= bestscore;
405  }
406 
407  return 0;
408 }
409 
413 int32
415  int16 *senone_scores,
416  uint8 *senone_active,
417  int32 n_senone_active,
418  mfcc_t ** featbuf, int32 frame,
419  int32 compallsen)
420 {
421  ptm_mgau_t *s = (ptm_mgau_t *)ps;
422  int fast_eval_idx;
423 
424  /* Find the appropriate frame in the rotating history buffer
425  * corresponding to the requested input frame. No bounds checking
426  * is done here, which just means you'll get semi-random crap if
427  * you request a frame in the future or one that's too far in the
428  * past. Since the history buffer is just used for fast match
429  * that might not be fatal. */
430  fast_eval_idx = frame % s->n_fast_hist;
431  s->f = s->hist + fast_eval_idx;
432  /* Compute the top-N codewords for every codebook, unless this
433  * is a past frame, in which case we already have them (we
434  * hope!) */
435  if (frame >= ps_mgau_base(ps)->frame_idx) {
436  ptm_fast_eval_t *lastf;
437  /* Get the previous frame's top-N information (on the
438  * first frame of the input this is just all WORST_DIST,
439  * no harm in that) */
440  if (fast_eval_idx == 0)
441  lastf = s->hist + s->n_fast_hist - 1;
442  else
443  lastf = s->hist + fast_eval_idx - 1;
444  /* Copy in initial top-N info */
445  memcpy(s->f->topn[0][0], lastf->topn[0][0],
446  s->g->n_mgau * s->g->n_feat * s->max_topn * sizeof(ptm_topn_t));
447  /* Generate initial active codebook list (this might not be
448  * necessary) */
449  ptm_mgau_calc_cb_active(s, senone_active, n_senone_active, compallsen);
450  /* Now evaluate top-N, prune, and evaluate remaining codebooks. */
451  ptm_mgau_codebook_eval(s, featbuf, frame);
452  ptm_mgau_codebook_norm(s, featbuf, frame);
453  }
454  /* Evaluate intersection of active senones and active codebooks. */
455  ptm_mgau_senone_eval(s, senone_scores, senone_active,
456  n_senone_active, compallsen);
457 
458  return 0;
459 }
460 
461 static int32
462 read_sendump(ptm_mgau_t *s, bin_mdef_t *mdef, char const *file)
463 {
464  FILE *fp;
465  char line[1000];
466  int32 i, n, r, c;
467  int32 do_swap, do_mmap;
468  size_t offset;
469  int n_clust = 0;
470  int n_feat = s->g->n_feat;
471  int n_density = s->g->n_density;
472  int n_sen = bin_mdef_n_sen(mdef);
473  int n_bits = 8;
474 
475  s->n_sen = n_sen; /* FIXME: Should have been done earlier */
476  do_mmap = cmd_ln_boolean_r(s->config, "-mmap");
477 
478  if ((fp = fopen(file, "rb")) == NULL)
479  return -1;
480 
481  E_INFO("Loading senones from dump file %s\n", file);
482  /* Read title size, title */
483  if (fread(&n, sizeof(int32), 1, fp) != 1) {
484  E_ERROR_SYSTEM("Failed to read title size from %s", file);
485  goto error_out;
486  }
487  /* This is extremely bogus */
488  do_swap = 0;
489  if (n < 1 || n > 999) {
490  SWAP_INT32(&n);
491  if (n < 1 || n > 999) {
492  E_ERROR("Title length %x in dump file %s out of range\n", n, file);
493  goto error_out;
494  }
495  do_swap = 1;
496  }
497  if (fread(line, sizeof(char), n, fp) != n) {
498  E_ERROR_SYSTEM("Cannot read title");
499  goto error_out;
500  }
501  if (line[n - 1] != '\0') {
502  E_ERROR("Bad title in dump file\n");
503  goto error_out;
504  }
505  E_INFO("%s\n", line);
506 
507  /* Read header size, header */
508  if (fread(&n, sizeof(n), 1, fp) != 1) {
509  E_ERROR_SYSTEM("Failed to read header size from %s", file);
510  goto error_out;
511  }
512  if (do_swap) SWAP_INT32(&n);
513  if (fread(line, sizeof(char), n, fp) != n) {
514  E_ERROR_SYSTEM("Cannot read header");
515  goto error_out;
516  }
517  if (line[n - 1] != '\0') {
518  E_ERROR("Bad header in dump file\n");
519  goto error_out;
520  }
521 
522  /* Read other header strings until string length = 0 */
523  for (;;) {
524  if (fread(&n, sizeof(n), 1, fp) != 1) {
525  E_ERROR_SYSTEM("Failed to read header string size from %s", file);
526  goto error_out;
527  }
528  if (do_swap) SWAP_INT32(&n);
529  if (n == 0)
530  break;
531  if (fread(line, sizeof(char), n, fp) != n) {
532  E_ERROR_SYSTEM("Cannot read header");
533  goto error_out;
534  }
535  /* Look for a cluster count, if present */
536  if (!strncmp(line, "feature_count ", strlen("feature_count "))) {
537  n_feat = atoi(line + strlen("feature_count "));
538  }
539  if (!strncmp(line, "mixture_count ", strlen("mixture_count "))) {
540  n_density = atoi(line + strlen("mixture_count "));
541  }
542  if (!strncmp(line, "model_count ", strlen("model_count "))) {
543  n_sen = atoi(line + strlen("model_count "));
544  }
545  if (!strncmp(line, "cluster_count ", strlen("cluster_count "))) {
546  n_clust = atoi(line + strlen("cluster_count "));
547  }
548  if (!strncmp(line, "cluster_bits ", strlen("cluster_bits "))) {
549  n_bits = atoi(line + strlen("cluster_bits "));
550  }
551  }
552 
553  /* Defaults for #rows, #columns in mixw array. */
554  c = n_sen;
555  r = n_density;
556  if (n_clust == 0) {
557  /* Older mixw files have them here, and they might be padded. */
558  if (fread(&r, sizeof(r), 1, fp) != 1) {
559  E_ERROR_SYSTEM("Cannot read #rows");
560  goto error_out;
561  }
562  if (do_swap) SWAP_INT32(&r);
563  if (fread(&c, sizeof(c), 1, fp) != 1) {
564  E_ERROR_SYSTEM("Cannot read #columns");
565  goto error_out;
566  }
567  if (do_swap) SWAP_INT32(&c);
568  E_INFO("Rows: %d, Columns: %d\n", r, c);
569  }
570 
571  if (n_feat != s->g->n_feat) {
572  E_ERROR("Number of feature streams mismatch: %d != %d\n",
573  n_feat, s->g->n_feat);
574  goto error_out;
575  }
576  if (n_density != s->g->n_density) {
577  E_ERROR("Number of densities mismatch: %d != %d\n",
578  n_density, s->g->n_density);
579  goto error_out;
580  }
581  if (n_sen != s->n_sen) {
582  E_ERROR("Number of senones mismatch: %d != %d\n",
583  n_sen, s->n_sen);
584  goto error_out;
585  }
586 
587  if (!((n_clust == 0) || (n_clust == 15) || (n_clust == 16))) {
588  E_ERROR("Cluster count must be 0, 15, or 16\n");
589  goto error_out;
590  }
591  if (n_clust == 15)
592  ++n_clust;
593 
594  if (!((n_bits == 8) || (n_bits == 4))) {
595  E_ERROR("Cluster count must be 4 or 8\n");
596  goto error_out;
597  }
598 
599  if (do_mmap) {
600  E_INFO("Using memory-mapped I/O for senones\n");
601  }
602  offset = ftell(fp);
603 
604  /* Allocate memory for pdfs (or memory map them) */
605  if (do_mmap) {
606  s->sendump_mmap = mmio_file_read(file);
607  /* Get cluster codebook if any. */
608  if (n_clust) {
609  s->mixw_cb = ((uint8 *) mmio_file_ptr(s->sendump_mmap)) + offset;
610  offset += n_clust;
611  }
612  }
613  else {
614  /* Get cluster codebook if any. */
615  if (n_clust) {
616  s->mixw_cb = ckd_calloc(1, n_clust);
617  if (fread(s->mixw_cb, 1, n_clust, fp) != (size_t) n_clust) {
618  E_ERROR("Failed to read %d bytes from sendump\n", n_clust);
619  goto error_out;
620  }
621  }
622  }
623 
624  /* Set up pointers, or read, or whatever */
625  if (s->sendump_mmap) {
626  s->mixw = ckd_calloc_2d(n_feat, n_density, sizeof(*s->mixw));
627  for (n = 0; n < n_feat; n++) {
628  int step = c;
629  if (n_bits == 4)
630  step = (step + 1) / 2;
631  for (i = 0; i < r; i++) {
632  s->mixw[n][i] = ((uint8 *) mmio_file_ptr(s->sendump_mmap)) + offset;
633  offset += step;
634  }
635  }
636  }
637  else {
638  s->mixw = ckd_calloc_3d(n_feat, n_density, n_sen, sizeof(***s->mixw));
639  /* Read pdf values and ids */
640  for (n = 0; n < n_feat; n++) {
641  int step = c;
642  if (n_bits == 4)
643  step = (step + 1) / 2;
644  for (i = 0; i < r; i++) {
645  if (fread(s->mixw[n][i], sizeof(***s->mixw), step, fp)
646  != (size_t) step) {
647  E_ERROR("Failed to read %d bytes from sendump\n", step);
648  goto error_out;
649  }
650  }
651  }
652  }
653 
654  fclose(fp);
655  return 0;
656 error_out:
657  fclose(fp);
658  return -1;
659 }
660 
661 static int32
662 read_mixw(ptm_mgau_t * s, char const *file_name, double SmoothMin)
663 {
664  char **argname, **argval;
665  char eofchk;
666  FILE *fp;
667  int32 byteswap, chksum_present;
668  uint32 chksum;
669  float32 *pdf;
670  int32 i, f, c, n;
671  int32 n_sen;
672  int32 n_feat;
673  int32 n_comp;
674  int32 n_err;
675 
676  E_INFO("Reading mixture weights file '%s'\n", file_name);
677 
678  if ((fp = fopen(file_name, "rb")) == NULL)
679  E_FATAL_SYSTEM("Failed to open mixture file '%s' for reading", file_name);
680 
681  /* Read header, including argument-value info and 32-bit byteorder magic */
682  if (bio_readhdr(fp, &argname, &argval, &byteswap) < 0)
683  E_FATAL("Failed to read header from '%s'\n", file_name);
684 
685  /* Parse argument-value list */
686  chksum_present = 0;
687  for (i = 0; argname[i]; i++) {
688  if (strcmp(argname[i], "version") == 0) {
689  if (strcmp(argval[i], MGAU_MIXW_VERSION) != 0)
690  E_WARN("Version mismatch(%s): %s, expecting %s\n",
691  file_name, argval[i], MGAU_MIXW_VERSION);
692  }
693  else if (strcmp(argname[i], "chksum0") == 0) {
694  chksum_present = 1; /* Ignore the associated value */
695  }
696  }
697  bio_hdrarg_free(argname, argval);
698  argname = argval = NULL;
699 
700  chksum = 0;
701 
702  /* Read #senones, #features, #codewords, arraysize */
703  if ((bio_fread(&n_sen, sizeof(int32), 1, fp, byteswap, &chksum) != 1)
704  || (bio_fread(&n_feat, sizeof(int32), 1, fp, byteswap, &chksum) !=
705  1)
706  || (bio_fread(&n_comp, sizeof(int32), 1, fp, byteswap, &chksum) !=
707  1)
708  || (bio_fread(&n, sizeof(int32), 1, fp, byteswap, &chksum) != 1)) {
709  E_FATAL("bio_fread(%s) (arraysize) failed\n", file_name);
710  }
711  if (n_feat != s->g->n_feat)
712  E_FATAL("#Features streams(%d) != %d\n", n_feat, s->g->n_feat);
713  if (n != n_sen * n_feat * n_comp) {
714  E_FATAL
715  ("%s: #float32s(%d) doesn't match header dimensions: %d x %d x %d\n",
716  file_name, i, n_sen, n_feat, n_comp);
717  }
718 
719  /* n_sen = number of mixture weights per codeword, which is
720  * fixed at the number of senones since we have only one codebook.
721  */
722  s->n_sen = n_sen;
723 
724  /* Quantized mixture weight arrays. */
725  s->mixw = ckd_calloc_3d(s->g->n_feat, s->g->n_density,
726  n_sen, sizeof(***s->mixw));
727 
728  /* Temporary structure to read in floats before conversion to (int32) logs3 */
729  pdf = (float32 *) ckd_calloc(n_comp, sizeof(float32));
730 
731  /* Read senone probs data, normalize, floor, convert to logs3, truncate to 8 bits */
732  n_err = 0;
733  for (i = 0; i < n_sen; i++) {
734  for (f = 0; f < n_feat; f++) {
735  if (bio_fread((void *) pdf, sizeof(float32),
736  n_comp, fp, byteswap, &chksum) != n_comp) {
737  E_FATAL("bio_fread(%s) (arraydata) failed\n", file_name);
738  }
739 
740  /* Normalize and floor */
741  if (vector_sum_norm(pdf, n_comp) <= 0.0)
742  n_err++;
743  vector_floor(pdf, n_comp, SmoothMin);
744  vector_sum_norm(pdf, n_comp);
745 
746  /* Convert to LOG, quantize, and transpose */
747  for (c = 0; c < n_comp; c++) {
748  int32 qscr;
749 
750  qscr = -logmath_log(s->lmath_8b, pdf[c]);
751  if ((qscr > MAX_NEG_MIXW) || (qscr < 0))
752  qscr = MAX_NEG_MIXW;
753  s->mixw[f][c][i] = qscr;
754  }
755  }
756  }
757  if (n_err > 0)
758  E_WARN("Weight normalization failed for %d mixture weights components\n", n_err);
759 
760  ckd_free(pdf);
761 
762  if (chksum_present)
763  bio_verify_chksum(fp, byteswap, chksum);
764 
765  if (fread(&eofchk, 1, 1, fp) == 1)
766  E_FATAL("More data than expected in %s\n", file_name);
767 
768  fclose(fp);
769 
770  E_INFO("Read %d x %d x %d mixture weights\n", n_sen, n_feat, n_comp);
771  return n_sen;
772 }
773 
774 ps_mgau_t *
775 ptm_mgau_init(acmod_t *acmod, bin_mdef_t *mdef)
776 {
777  ptm_mgau_t *s;
778  ps_mgau_t *ps;
779  char const *sendump_path;
780  int i;
781 
782  s = ckd_calloc(1, sizeof(*s));
783  s->config = acmod->config;
784 
785  s->lmath = logmath_retain(acmod->lmath);
786  /* Log-add table. */
787  s->lmath_8b = logmath_init(logmath_get_base(acmod->lmath), SENSCR_SHIFT, TRUE);
788  if (s->lmath_8b == NULL)
789  goto error_out;
790  /* Ensure that it is only 8 bits wide so that fast_logmath_add() works. */
791  if (logmath_get_width(s->lmath_8b) != 1) {
792  E_ERROR("Log base %f is too small to represent add table in 8 bits\n",
793  logmath_get_base(s->lmath_8b));
794  goto error_out;
795  }
796 
797  /* Read means and variances. */
798  if ((s->g = gauden_init(cmd_ln_str_r(s->config, "_mean"),
799  cmd_ln_str_r(s->config, "_var"),
800  cmd_ln_float32_r(s->config, "-varfloor"),
801  s->lmath)) == NULL) {
802  E_ERROR("Failed to read means and variances\n");
803  goto error_out;
804  }
805 
806  /* We only support 256 codebooks or less (like 640k or 2GB, this
807  * should be enough for anyone) */
808  if (s->g->n_mgau > 256) {
809  E_INFO("Number of codebooks exceeds 256: %d\n", s->g->n_mgau);
810  goto error_out;
811  }
812  if (s->g->n_mgau != bin_mdef_n_ciphone(mdef)) {
813  E_INFO("Number of codebooks doesn't match number of ciphones, doesn't look like PTM: %d != %d\n", s->g->n_mgau, bin_mdef_n_ciphone(mdef));
814  goto error_out;
815  }
816  /* Verify n_feat and veclen, against acmod. */
817  if (s->g->n_feat != feat_dimension1(acmod->fcb)) {
818  E_ERROR("Number of streams does not match: %d != %d\n",
819  s->g->n_feat, feat_dimension1(acmod->fcb));
820  goto error_out;
821  }
822  for (i = 0; i < s->g->n_feat; ++i) {
823  if (s->g->featlen[i] != feat_dimension2(acmod->fcb, i)) {
824  E_ERROR("Dimension of stream %d does not match: %d != %d\n",
825  s->g->featlen[i], feat_dimension2(acmod->fcb, i));
826  goto error_out;
827  }
828  }
829  /* Read mixture weights. */
830  if ((sendump_path = cmd_ln_str_r(s->config, "_sendump"))) {
831  if (read_sendump(s, acmod->mdef, sendump_path) < 0) {
832  goto error_out;
833  }
834  }
835  else {
836  if (read_mixw(s, cmd_ln_str_r(s->config, "_mixw"),
837  cmd_ln_float32_r(s->config, "-mixwfloor")) < 0) {
838  goto error_out;
839  }
840  }
841  s->ds_ratio = cmd_ln_int32_r(s->config, "-ds");
842  s->max_topn = cmd_ln_int32_r(s->config, "-topn");
843  E_INFO("Maximum top-N: %d\n", s->max_topn);
844 
845  /* Assume mapping of senones to their base phones, though this
846  * will become more flexible in the future. */
847  s->sen2cb = ckd_calloc(s->n_sen, sizeof(*s->sen2cb));
848  for (i = 0; i < s->n_sen; ++i)
849  s->sen2cb[i] = bin_mdef_sen2cimap(acmod->mdef, i);
850 
851  /* Allocate fast-match history buffers. We need enough for the
852  * phoneme lookahead window, plus the current frame, plus one for
853  * good measure? (FIXME: I don't remember why) */
854  s->n_fast_hist = cmd_ln_int32_r(s->config, "-pl_window") + 2;
855  s->hist = ckd_calloc(s->n_fast_hist, sizeof(*s->hist));
856  /* s->f will be a rotating pointer into s->hist. */
857  s->f = s->hist;
858  for (i = 0; i < s->n_fast_hist; ++i) {
859  int j, k, m;
860  /* Top-N codewords for every codebook and feature. */
861  s->hist[i].topn = ckd_calloc_3d(s->g->n_mgau, s->g->n_feat,
862  s->max_topn, sizeof(ptm_topn_t));
863  /* Initialize them to sane (yet arbitrary) defaults. */
864  for (j = 0; j < s->g->n_mgau; ++j) {
865  for (k = 0; k < s->g->n_feat; ++k) {
866  for (m = 0; m < s->max_topn; ++m) {
867  s->hist[i].topn[j][k][m].cw = m;
868  s->hist[i].topn[j][k][m].score = WORST_DIST;
869  }
870  }
871  }
872  /* Active codebook mapping (just codebook, not features,
873  at least not yet) */
874  s->hist[i].mgau_active = bitvec_alloc(s->g->n_mgau);
875  /* Start with them all on, prune them later. */
876  bitvec_set_all(s->hist[i].mgau_active, s->g->n_mgau);
877  }
878 
879  ps = (ps_mgau_t *)s;
880  ps->vt = &ptm_mgau_funcs;
881  return ps;
882 error_out:
883  ptm_mgau_free(ps_mgau_base(s));
884  return NULL;
885 }
886 
887 int
888 ptm_mgau_mllr_transform(ps_mgau_t *ps,
889  ps_mllr_t *mllr)
890 {
891  ptm_mgau_t *s = (ptm_mgau_t *)ps;
892  return gauden_mllr_transform(s->g, mllr, s->config);
893 }
894 
895 void
896 ptm_mgau_free(ps_mgau_t *ps)
897 {
898  int i;
899  ptm_mgau_t *s = (ptm_mgau_t *)ps;
900 
901  logmath_free(s->lmath);
902  logmath_free(s->lmath_8b);
903  if (s->sendump_mmap) {
904  ckd_free_2d(s->mixw);
905  mmio_file_unmap(s->sendump_mmap);
906  }
907  else {
908  ckd_free_3d(s->mixw);
909  }
910  ckd_free(s->sen2cb);
911 
912  for (i = 0; i < s->n_fast_hist; i++) {
913  ckd_free_3d(s->hist[i].topn);
914  bitvec_free(s->hist[i].mgau_active);
915  }
916  ckd_free(s->hist);
917 
918  gauden_free(s->g);
919  ckd_free(s);
920 }
#define WORST_SCORE
Large "bad" score.
Definition: hmm.h:84
#define SENSCR_SHIFT
Shift count for senone scores.
Definition: hmm.h:73
void gauden_free(gauden_t *g)
Release memory allocated by gauden_init.
Definition: ms_gauden.c:358
int32 gauden_mllr_transform(gauden_t *s, ps_mllr_t *mllr, cmd_ln_t *config)
Transform Gaussians according to an MLLR matrix (or, eventually, more).
Definition: ms_gauden.c:509
gauden_t * gauden_init(char const *meanfile, char const *varfile, float32 varfloor, logmath_t *lmath)
Read mixture gaussian codebooks from the given files.
Definition: ms_gauden.c:311
Fast phonetically-tied mixture evaluation.
int ptm_mgau_frame_eval(ps_mgau_t *s, int16 *senone_scores, uint8 *senone_active, int32 n_senone_active, mfcc_t **featbuf, int32 frame, int32 compallsen)
Compute senone scores for the active senones.
Definition: ptm_mgau.c:414
Acoustic model structure.
Definition: acmod.h:148
bin_mdef_t * mdef
Model definition.
Definition: acmod.h:159
cmd_ln_t * config
Configuration.
Definition: acmod.h:150
feat_t * fcb
Dynamic feature computation.
Definition: acmod.h:156
logmath_t * lmath
Log-math computation.
Definition: acmod.h:151
mfcc_t **** var
like mean; diagonal covariance vector only
Definition: ms_gauden.h:84
mfcc_t *** det
log(determinant) for each variance vector; actually, log(sqrt(2*pi*det))
Definition: ms_gauden.h:85
int32 n_feat
Number feature streams in each codebook.
Definition: ms_gauden.h:89
mfcc_t **** mean
mean[codebook][feature][codeword] vector
Definition: ms_gauden.h:83
int32 n_density
Number gaussian densities in each codebook-feature stream.
Definition: ms_gauden.h:90
int32 * featlen
feature length for each feature
Definition: ms_gauden.h:91
int32 n_mgau
Number codebooks.
Definition: ms_gauden.h:88
ps_mgaufuncs_t * vt
vtable of mgau functions.
Definition: acmod.h:114
Feature space linear transform structure.
Definition: acmod.h:82
ptm_topn_t *** topn
Top-N for each codebook (mgau x feature x topn)
Definition: ptm_mgau.h:64
bitvec_t * mgau_active
Set of active codebooks.
Definition: ptm_mgau.h:65
uint8 * sen2cb
Senone to codebook mapping.
Definition: ptm_mgau.h:73
ptm_fast_eval_t * hist
Fast evaluation info for past frames.
Definition: ptm_mgau.h:80
cmd_ln_t * config
Configuration parameters.
Definition: ptm_mgau.h:70
ptm_fast_eval_t * f
Fast eval info for current frame.
Definition: ptm_mgau.h:81
int32 n_sen
Number of senones.
Definition: ptm_mgau.h:72
int n_fast_hist
Number of past frames tracked.
Definition: ptm_mgau.h:82
gauden_t * g
Set of Gaussians.
Definition: ptm_mgau.h:71
uint8 *** mixw
Mixture weight distributions by feature, codeword, senone.
Definition: ptm_mgau.h:74
int32 cw
Codeword index.
Definition: ptm_mgau.h:59
int32 score
Score.
Definition: ptm_mgau.h:60
Common code shared between SC and PTM (tied-state) models.
#define GMMSUB(a, b)
Subtract GMM component b (assumed to be positive) and saturate.
LOGMATH_INLINE int fast_logmath_add(logmath_t *lmath, int mlx, int mly)
Quickly log-add two negated log probabilities.
#define MAX_NEG_ASCR
Maximum negated acoustic score value.
#define MAX_NEG_MIXW
Maximum negated mixture weight value.