45 #if defined(__ADSPBLACKFIN__)
46 #elif !defined(_WIN32_WCE)
47 #include <sys/types.h>
51 #include <sphinx_config.h>
52 #include <sphinxbase/cmd_ln.h>
53 #include <sphinxbase/fixpoint.h>
54 #include <sphinxbase/ckd_alloc.h>
55 #include <sphinxbase/bio.h>
56 #include <sphinxbase/err.h>
57 #include <sphinxbase/prim_type.h>
66 ptm_mgau_mllr_transform,
70 #define COMPUTE_GMM_MAP(_idx) \
71 diff[_idx] = obs[_idx] - mean[_idx]; \
72 sqdiff[_idx] = MFCCMUL(diff[_idx], diff[_idx]); \
73 compl[_idx] = MFCCMUL(sqdiff[_idx], var[_idx]);
74 #define COMPUTE_GMM_REDUCE(_idx) \
75 d = GMMSUB(d, compl[_idx]);
78 insertion_sort_topn(
ptm_topn_t *topn,
int i, int32 d)
87 for (j = i - 1; j >= 0 && d > topn[j].
score; j--) {
88 topn[j + 1] = topn[j];
94 eval_topn(
ptm_mgau_t *s,
int cb,
int feat, mfcc_t *z)
99 topn = s->
f->
topn[cb][feat];
102 for (i = 0; i < s->max_topn; i++) {
103 mfcc_t *mean, diff[4], sqdiff[4], compl[4];
109 mean = s->
g->
mean[cb][feat][0] + cw * ceplen;
110 var = s->
g->
var[cb][feat][0] + cw * ceplen;
111 d = s->
g->
det[cb][feat][cw];
113 for (j = 0; j < ceplen % 4; ++j) {
114 diff[0] = *obs++ - *mean++;
115 sqdiff[0] = MFCCMUL(diff[0], diff[0]);
116 compl[0] = MFCCMUL(sqdiff[0], *var);
122 for (;j < ceplen; j += 4) {
127 COMPUTE_GMM_REDUCE(0);
128 COMPUTE_GMM_REDUCE(1);
129 COMPUTE_GMM_REDUCE(2);
130 COMPUTE_GMM_REDUCE(3);
135 if (d < (mfcc_t)INT_MIN)
136 insertion_sort_topn(topn, i, INT_MIN);
138 insertion_sort_topn(topn, i, (int32)d);
141 return topn[0].
score;
150 for (*cur = worst - 1; *cur >= best && intd >= (*cur)->
score; --*cur)
151 memcpy(*cur + 1, *cur,
sizeof(**cur));
154 (*cur)->score = intd;
158 eval_cb(
ptm_mgau_t *s,
int cb,
int feat, mfcc_t *z)
162 mfcc_t *var, *det, *detP, *detE;
165 best = topn = s->
f->
topn[cb][feat];
166 worst = topn + (s->max_topn - 1);
167 mean = s->
g->
mean[cb][feat][0];
168 var = s->
g->
var[cb][feat][0];
169 det = s->
g->
det[cb][feat];
173 for (detP = det; detP < detE; ++detP) {
174 mfcc_t diff[4], sqdiff[4], compl[4];
181 thresh = (mfcc_t) worst->
score;
183 cw = (
int)(detP - det);
188 for (j = 0; (j < ceplen % 4) && (d >= thresh); ++j) {
189 diff[0] = *obs++ - *mean++;
190 sqdiff[0] = MFCCMUL(diff[0], diff[0]);
191 compl[0] = MFCCMUL(sqdiff[0], *var++);
197 for (; j < ceplen && d >= thresh; j += 4) {
202 COMPUTE_GMM_REDUCE(0);
203 COMPUTE_GMM_REDUCE(1);
204 COMPUTE_GMM_REDUCE(2);
205 COMPUTE_GMM_REDUCE(3);
212 mean += (ceplen - j);
218 for (i = 0; i < s->max_topn; i++) {
220 if (topn[i].cw == cw)
225 if (d < (mfcc_t)INT_MIN)
226 insertion_sort_cb(&cur, worst, best, cw, INT_MIN);
228 insertion_sort_cb(&cur, worst, best, cw, (int32)d);
238 ptm_mgau_codebook_eval(
ptm_mgau_t *s, mfcc_t **z,
int frame)
243 for (i = 0; i < s->
g->
n_mgau; ++i)
244 for (j = 0; j < s->
g->
n_feat; ++j)
245 eval_topn(s, i, j, z[j]);
248 if (frame % s->ds_ratio)
252 for (i = 0; i < s->
g->
n_mgau; ++i) {
255 for (j = 0; j < s->
g->
n_feat; ++j) {
256 eval_cb(s, i, j, z[j]);
272 ptm_mgau_codebook_norm(
ptm_mgau_t *s, mfcc_t **z,
int frame)
276 for (j = 0; j < s->
g->
n_feat; ++j) {
278 for (i = 0; i < s->
g->
n_mgau; ++i) {
284 for (i = 0; i < s->
g->
n_mgau; ++i) {
288 for (k = 0; k < s->max_topn; ++k) {
302 ptm_mgau_calc_cb_active(
ptm_mgau_t *s, uint8 *senone_active,
303 int32 n_senone_active,
int compallsen)
312 for (lastsen = i = 0; i < n_senone_active; ++i) {
313 int sen = senone_active[i] + lastsen;
318 E_DEBUG(1, (
"Active codebooks:"));
319 for (i = 0; i < s->
g->
n_mgau; ++i) {
322 E_DEBUGCONT(1, (
" %d", i));
324 E_DEBUGCONT(1, (
"\n"));
332 ptm_mgau_senone_eval(
ptm_mgau_t *s, int16 *senone_scores,
333 uint8 *senone_active, int32 n_senone_active,
336 int i, lastsen, bestscore;
338 memset(senone_scores, 0, s->
n_sen *
sizeof(*senone_scores));
345 n_senone_active = s->
n_sen;
346 bestscore = 0x7fffffff;
347 for (lastsen = i = 0; i < n_senone_active; ++i) {
354 sen = senone_active[i] + lastsen;
364 for (f = 0; f < s->
g->
n_feat; ++f) {
365 for (j = 0; j < s->max_topn; ++j) {
373 for (f = 0; f < s->
g->
n_feat; ++f) {
376 topn = s->
f->
topn[cb][f];
377 for (j = 0; j < s->max_topn; ++j) {
381 int dcw = s->
mixw[f][topn[j].
cw][sen/2];
382 dcw = (dcw & 1) ? dcw >> 4 : dcw & 0x0f;
383 mixw = s->mixw_cb[dcw];
386 mixw = s->
mixw[f][topn[j].
cw][sen];
389 fden = mixw + topn[j].
score;
392 mixw + topn[j].score);
393 E_DEBUG(3, (
"fden[%d][%d] l+= %d + %d = %d\n",
394 sen, f, mixw, topn[j].score, fden));
398 if (ascore < bestscore) bestscore = ascore;
399 senone_scores[sen] = ascore;
403 for (i = 0; i < s->
n_sen; ++i) {
404 senone_scores[i] -= bestscore;
415 int16 *senone_scores,
416 uint8 *senone_active,
417 int32 n_senone_active,
418 mfcc_t ** featbuf, int32 frame,
431 s->
f = s->
hist + fast_eval_idx;
435 if (frame >= ps_mgau_base(ps)->frame_idx) {
440 if (fast_eval_idx == 0)
443 lastf = s->
hist + fast_eval_idx - 1;
445 memcpy(s->
f->
topn[0][0], lastf->
topn[0][0],
449 ptm_mgau_calc_cb_active(s, senone_active, n_senone_active, compallsen);
451 ptm_mgau_codebook_eval(s, featbuf, frame);
452 ptm_mgau_codebook_norm(s, featbuf, frame);
455 ptm_mgau_senone_eval(s, senone_scores, senone_active,
456 n_senone_active, compallsen);
467 int32 do_swap, do_mmap;
472 int n_sen = bin_mdef_n_sen(mdef);
476 do_mmap = cmd_ln_boolean_r(s->
config,
"-mmap");
478 if ((fp = fopen(file,
"rb")) == NULL)
481 E_INFO(
"Loading senones from dump file %s\n", file);
483 if (fread(&n,
sizeof(int32), 1, fp) != 1) {
484 E_ERROR_SYSTEM(
"Failed to read title size from %s", file);
489 if (n < 1 || n > 999) {
491 if (n < 1 || n > 999) {
492 E_ERROR(
"Title length %x in dump file %s out of range\n", n, file);
497 if (fread(line,
sizeof(
char), n, fp) != n) {
498 E_ERROR_SYSTEM(
"Cannot read title");
501 if (line[n - 1] !=
'\0') {
502 E_ERROR(
"Bad title in dump file\n");
505 E_INFO(
"%s\n", line);
508 if (fread(&n,
sizeof(n), 1, fp) != 1) {
509 E_ERROR_SYSTEM(
"Failed to read header size from %s", file);
512 if (do_swap) SWAP_INT32(&n);
513 if (fread(line,
sizeof(
char), n, fp) != n) {
514 E_ERROR_SYSTEM(
"Cannot read header");
517 if (line[n - 1] !=
'\0') {
518 E_ERROR(
"Bad header in dump file\n");
524 if (fread(&n,
sizeof(n), 1, fp) != 1) {
525 E_ERROR_SYSTEM(
"Failed to read header string size from %s", file);
528 if (do_swap) SWAP_INT32(&n);
531 if (fread(line,
sizeof(
char), n, fp) != n) {
532 E_ERROR_SYSTEM(
"Cannot read header");
536 if (!strncmp(line,
"feature_count ", strlen(
"feature_count "))) {
537 n_feat = atoi(line + strlen(
"feature_count "));
539 if (!strncmp(line,
"mixture_count ", strlen(
"mixture_count "))) {
540 n_density = atoi(line + strlen(
"mixture_count "));
542 if (!strncmp(line,
"model_count ", strlen(
"model_count "))) {
543 n_sen = atoi(line + strlen(
"model_count "));
545 if (!strncmp(line,
"cluster_count ", strlen(
"cluster_count "))) {
546 n_clust = atoi(line + strlen(
"cluster_count "));
548 if (!strncmp(line,
"cluster_bits ", strlen(
"cluster_bits "))) {
549 n_bits = atoi(line + strlen(
"cluster_bits "));
558 if (fread(&r,
sizeof(r), 1, fp) != 1) {
559 E_ERROR_SYSTEM(
"Cannot read #rows");
562 if (do_swap) SWAP_INT32(&r);
563 if (fread(&c,
sizeof(c), 1, fp) != 1) {
564 E_ERROR_SYSTEM(
"Cannot read #columns");
567 if (do_swap) SWAP_INT32(&c);
568 E_INFO(
"Rows: %d, Columns: %d\n", r, c);
572 E_ERROR(
"Number of feature streams mismatch: %d != %d\n",
577 E_ERROR(
"Number of densities mismatch: %d != %d\n",
581 if (n_sen != s->
n_sen) {
582 E_ERROR(
"Number of senones mismatch: %d != %d\n",
587 if (!((n_clust == 0) || (n_clust == 15) || (n_clust == 16))) {
588 E_ERROR(
"Cluster count must be 0, 15, or 16\n");
594 if (!((n_bits == 8) || (n_bits == 4))) {
595 E_ERROR(
"Cluster count must be 4 or 8\n");
600 E_INFO(
"Using memory-mapped I/O for senones\n");
606 s->sendump_mmap = mmio_file_read(file);
609 s->mixw_cb = ((uint8 *) mmio_file_ptr(s->sendump_mmap)) + offset;
616 s->mixw_cb = ckd_calloc(1, n_clust);
617 if (fread(s->mixw_cb, 1, n_clust, fp) != (
size_t) n_clust) {
618 E_ERROR(
"Failed to read %d bytes from sendump\n", n_clust);
625 if (s->sendump_mmap) {
626 s->
mixw = ckd_calloc_2d(n_feat, n_density,
sizeof(*s->
mixw));
627 for (n = 0; n < n_feat; n++) {
630 step = (step + 1) / 2;
631 for (i = 0; i < r; i++) {
632 s->
mixw[n][i] = ((uint8 *) mmio_file_ptr(s->sendump_mmap)) + offset;
638 s->
mixw = ckd_calloc_3d(n_feat, n_density, n_sen,
sizeof(***s->
mixw));
640 for (n = 0; n < n_feat; n++) {
643 step = (step + 1) / 2;
644 for (i = 0; i < r; i++) {
645 if (fread(s->
mixw[n][i],
sizeof(***s->
mixw), step, fp)
647 E_ERROR(
"Failed to read %d bytes from sendump\n", step);
662 read_mixw(
ptm_mgau_t * s,
char const *file_name,
double SmoothMin)
664 char **argname, **argval;
667 int32 byteswap, chksum_present;
676 E_INFO(
"Reading mixture weights file '%s'\n", file_name);
678 if ((fp = fopen(file_name,
"rb")) == NULL)
679 E_FATAL_SYSTEM(
"Failed to open mixture file '%s' for reading", file_name);
682 if (bio_readhdr(fp, &argname, &argval, &byteswap) < 0)
683 E_FATAL(
"Failed to read header from '%s'\n", file_name);
687 for (i = 0; argname[i]; i++) {
688 if (strcmp(argname[i],
"version") == 0) {
689 if (strcmp(argval[i], MGAU_MIXW_VERSION) != 0)
690 E_WARN(
"Version mismatch(%s): %s, expecting %s\n",
691 file_name, argval[i], MGAU_MIXW_VERSION);
693 else if (strcmp(argname[i],
"chksum0") == 0) {
697 bio_hdrarg_free(argname, argval);
698 argname = argval = NULL;
703 if ((bio_fread(&n_sen,
sizeof(int32), 1, fp, byteswap, &chksum) != 1)
704 || (bio_fread(&n_feat,
sizeof(int32), 1, fp, byteswap, &chksum) !=
706 || (bio_fread(&n_comp,
sizeof(int32), 1, fp, byteswap, &chksum) !=
708 || (bio_fread(&n,
sizeof(int32), 1, fp, byteswap, &chksum) != 1)) {
709 E_FATAL(
"bio_fread(%s) (arraysize) failed\n", file_name);
712 E_FATAL(
"#Features streams(%d) != %d\n", n_feat, s->
g->
n_feat);
713 if (n != n_sen * n_feat * n_comp) {
715 (
"%s: #float32s(%d) doesn't match header dimensions: %d x %d x %d\n",
716 file_name, i, n_sen, n_feat, n_comp);
726 n_sen,
sizeof(***s->
mixw));
729 pdf = (float32 *) ckd_calloc(n_comp,
sizeof(float32));
733 for (i = 0; i < n_sen; i++) {
734 for (f = 0; f < n_feat; f++) {
735 if (bio_fread((
void *) pdf,
sizeof(float32),
736 n_comp, fp, byteswap, &chksum) != n_comp) {
737 E_FATAL(
"bio_fread(%s) (arraydata) failed\n", file_name);
741 if (vector_sum_norm(pdf, n_comp) <= 0.0)
743 vector_floor(pdf, n_comp, SmoothMin);
744 vector_sum_norm(pdf, n_comp);
747 for (c = 0; c < n_comp; c++) {
750 qscr = -logmath_log(s->lmath_8b, pdf[c]);
753 s->
mixw[f][c][i] = qscr;
758 E_WARN(
"Weight normalization failed for %d mixture weights components\n", n_err);
763 bio_verify_chksum(fp, byteswap, chksum);
765 if (fread(&eofchk, 1, 1, fp) == 1)
766 E_FATAL(
"More data than expected in %s\n", file_name);
770 E_INFO(
"Read %d x %d x %d mixture weights\n", n_sen, n_feat, n_comp);
779 char const *sendump_path;
782 s = ckd_calloc(1,
sizeof(*s));
785 s->lmath = logmath_retain(acmod->
lmath);
788 if (s->lmath_8b == NULL)
791 if (logmath_get_width(s->lmath_8b) != 1) {
792 E_ERROR(
"Log base %f is too small to represent add table in 8 bits\n",
793 logmath_get_base(s->lmath_8b));
799 cmd_ln_str_r(s->
config,
"_var"),
800 cmd_ln_float32_r(s->
config,
"-varfloor"),
801 s->lmath)) == NULL) {
802 E_ERROR(
"Failed to read means and variances\n");
809 E_INFO(
"Number of codebooks exceeds 256: %d\n", s->
g->
n_mgau);
812 if (s->
g->
n_mgau != bin_mdef_n_ciphone(mdef)) {
813 E_INFO(
"Number of codebooks doesn't match number of ciphones, doesn't look like PTM: %d != %d\n", s->
g->
n_mgau, bin_mdef_n_ciphone(mdef));
817 if (s->
g->
n_feat != feat_dimension1(acmod->
fcb)) {
818 E_ERROR(
"Number of streams does not match: %d != %d\n",
822 for (i = 0; i < s->
g->
n_feat; ++i) {
823 if (s->
g->
featlen[i] != feat_dimension2(acmod->
fcb, i)) {
824 E_ERROR(
"Dimension of stream %d does not match: %d != %d\n",
830 if ((sendump_path = cmd_ln_str_r(s->
config,
"_sendump"))) {
831 if (read_sendump(s, acmod->
mdef, sendump_path) < 0) {
836 if (read_mixw(s, cmd_ln_str_r(s->
config,
"_mixw"),
837 cmd_ln_float32_r(s->
config,
"-mixwfloor")) < 0) {
841 s->ds_ratio = cmd_ln_int32_r(s->
config,
"-ds");
842 s->max_topn = cmd_ln_int32_r(s->
config,
"-topn");
843 E_INFO(
"Maximum top-N: %d\n", s->max_topn);
848 for (i = 0; i < s->
n_sen; ++i)
849 s->
sen2cb[i] = bin_mdef_sen2cimap(acmod->
mdef, i);
864 for (j = 0; j < s->
g->
n_mgau; ++j) {
865 for (k = 0; k < s->
g->
n_feat; ++k) {
866 for (m = 0; m < s->max_topn; ++m) {
880 ps->
vt = &ptm_mgau_funcs;
883 ptm_mgau_free(ps_mgau_base(s));
901 logmath_free(s->lmath);
902 logmath_free(s->lmath_8b);
903 if (s->sendump_mmap) {
904 ckd_free_2d(s->
mixw);
905 mmio_file_unmap(s->sendump_mmap);
908 ckd_free_3d(s->
mixw);
#define WORST_SCORE
Large "bad" score.
#define SENSCR_SHIFT
Shift count for senone scores.
void gauden_free(gauden_t *g)
Release memory allocated by gauden_init.
int32 gauden_mllr_transform(gauden_t *s, ps_mllr_t *mllr, cmd_ln_t *config)
Transform Gaussians according to an MLLR matrix (or, eventually, more).
gauden_t * gauden_init(char const *meanfile, char const *varfile, float32 varfloor, logmath_t *lmath)
Read mixture gaussian codebooks from the given files.
Fast phonetically-tied mixture evaluation.
int ptm_mgau_frame_eval(ps_mgau_t *s, int16 *senone_scores, uint8 *senone_active, int32 n_senone_active, mfcc_t **featbuf, int32 frame, int32 compallsen)
Compute senone scores for the active senones.
Acoustic model structure.
bin_mdef_t * mdef
Model definition.
cmd_ln_t * config
Configuration.
feat_t * fcb
Dynamic feature computation.
logmath_t * lmath
Log-math computation.
mfcc_t **** var
like mean; diagonal covariance vector only
mfcc_t *** det
log(determinant) for each variance vector; actually, log(sqrt(2*pi*det))
int32 n_feat
Number feature streams in each codebook.
mfcc_t **** mean
mean[codebook][feature][codeword] vector
int32 n_density
Number gaussian densities in each codebook-feature stream.
int32 * featlen
feature length for each feature
int32 n_mgau
Number codebooks.
ps_mgaufuncs_t * vt
vtable of mgau functions.
Feature space linear transform structure.
ptm_topn_t *** topn
Top-N for each codebook (mgau x feature x topn)
bitvec_t * mgau_active
Set of active codebooks.
uint8 * sen2cb
Senone to codebook mapping.
ptm_fast_eval_t * hist
Fast evaluation info for past frames.
cmd_ln_t * config
Configuration parameters.
ptm_fast_eval_t * f
Fast eval info for current frame.
int32 n_sen
Number of senones.
int n_fast_hist
Number of past frames tracked.
gauden_t * g
Set of Gaussians.
uint8 *** mixw
Mixture weight distributions by feature, codeword, senone.
Common code shared between SC and PTM (tied-state) models.
#define GMMSUB(a, b)
Subtract GMM component b (assumed to be positive) and saturate.
LOGMATH_INLINE int fast_logmath_add(logmath_t *lmath, int mlx, int mly)
Quickly log-add two negated log probabilities.
#define MAX_NEG_ASCR
Maximum negated acoustic score value.
#define MAX_NEG_MIXW
Maximum negated mixture weight value.