PocketSphinx 5prealpha
dict.h
Go to the documentation of this file.
1/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2/* ====================================================================
3 * Copyright (c) 1999-2004 Carnegie Mellon University. All rights
4 * reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 *
18 * This work was supported in part by funding from the Defense Advanced
19 * Research Projects Agency and the National Science Foundation of the
20 * United States of America, and the CMU Sphinx Speech Consortium.
21 *
22 * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23 * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26 * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 *
34 * ====================================================================
35 *
36 */
37
38#ifndef _S3_DICT_H_
39#define _S3_DICT_H_
40
45/* SphinxBase headers. */
46#include <sphinxbase/hash_table.h>
47
48/* Local headers. */
49#include "s3types.h"
50#include "bin_mdef.h"
51#include "pocketsphinx_export.h"
52
53#define S3DICT_INC_SZ 4096
54
55#ifdef __cplusplus
56extern "C" {
57#endif
58
63typedef struct {
64 char *word;
66 int32 pronlen;
67 s3wid_t alt;
68 s3wid_t basewid;
70
76typedef struct {
77 int refcnt;
80 hash_table_t *ht;
81 int32 max_words;
82 int32 n_word;
84 int32 filler_end;
85 s3wid_t startwid;
86 s3wid_t finishwid;
87 s3wid_t silwid;
88 int nocase;
89} dict_t;
90
91
103dict_t *dict_init(cmd_ln_t *config,
104 bin_mdef_t *mdef
105 );
106
110int dict_write(dict_t *dict, char const *filename, char const *format);
111
113POCKETSPHINX_EXPORT
114s3wid_t dict_wordid(dict_t *d, const char *word);
115
121 s3wid_t w
122 );
123
127POCKETSPHINX_EXPORT
128int dict_real_word(dict_t *d,
129 s3wid_t w
130 );
131
136s3wid_t dict_add_word(dict_t *d,
137 char const *word,
138 s3cipid_t const *p,
139 int32 np
140 );
141
145const char *dict_ciphone_str(dict_t *d,
146 s3wid_t wid,
147 int32 pos
148 );
149
151#define dict_size(d) ((d)->n_word)
152#define dict_num_fillers(d) (dict_filler_end(d) - dict_filler_start(d))
158#define dict_num_real_words(d) \
159 (dict_size(d) - (dict_filler_end(d) - dict_filler_start(d)) - 2)
160#define dict_basewid(d,w) ((d)->word[w].basewid)
161#define dict_wordstr(d,w) ((w) < 0 ? NULL : (d)->word[w].word)
162#define dict_basestr(d,w) ((d)->word[dict_basewid(d,w)].word)
163#define dict_nextalt(d,w) ((d)->word[w].alt)
164#define dict_pronlen(d,w) ((d)->word[w].pronlen)
165#define dict_pron(d,w,p) ((d)->word[w].ciphone[p])
166#define dict_filler_start(d) ((d)->filler_start)
167#define dict_filler_end(d) ((d)->filler_end)
168#define dict_startwid(d) ((d)->startwid)
169#define dict_finishwid(d) ((d)->finishwid)
170#define dict_silwid(d) ((d)->silwid)
171#define dict_is_single_phone(d,w) ((d)->word[w].pronlen == 1)
172#define dict_first_phone(d,w) ((d)->word[w].ciphone[0])
173#define dict_second_phone(d,w) ((d)->word[w].ciphone[1])
174#define dict_second_last_phone(d,w) ((d)->word[w].ciphone[(d)->word[w].pronlen - 2])
175#define dict_last_phone(d,w) ((d)->word[w].ciphone[(d)->word[w].pronlen - 1])
176
177/* Hard-coded special words */
178#define S3_START_WORD "<s>"
179#define S3_FINISH_WORD "</s>"
180#define S3_SILENCE_WORD "<sil>"
181#define S3_UNKNOWN_WORD "<UNK>"
182
190int32 dict_word2basestr(char *word);
191
196
200int dict_free(dict_t *d);
201
203void dict_report(dict_t *d
204 );
205
206#ifdef __cplusplus
207}
208#endif
209
210#endif
Binary format model definition files, with support for heterogeneous topologies and variable-size N-p...
s3wid_t dict_add_word(dict_t *d, char const *word, s3cipid_t const *p, int32 np)
Add a word with the given ciphone pronunciation list to the dictionary.
Definition dict.c:80
int32 dict_word2basestr(char *word)
If the given word contains a trailing "(....)" (i.e., a Sphinx-II style alternative pronunciation spe...
Definition dict.c:442
void dict_report(dict_t *d)
Report a dictionary structure.
Definition dict.c:499
dict_t * dict_retain(dict_t *d)
Retain a pointer to an dict_t.
Definition dict.c:461
int dict_write(dict_t *dict, char const *filename, char const *format)
Write dictionary to a file.
Definition dict.c:221
int dict_free(dict_t *d)
Release a pointer to a dictionary.
Definition dict.c:468
int dict_filler_word(dict_t *d, s3wid_t w)
Return 1 if w is a filler word, 0 if not.
Definition dict.c:413
const char * dict_ciphone_str(dict_t *d, s3wid_t wid, int32 pos)
Return value: CI phone string for the given word, phone position.
Definition dict.c:69
dict_t * dict_init(cmd_ln_t *config, bin_mdef_t *mdef)
Initialize a new dictionary.
Definition dict.c:252
POCKETSPHINX_EXPORT int dict_real_word(dict_t *d, s3wid_t w)
Test if w is a "real" word, i.e.
Definition dict.c:427
POCKETSPHINX_EXPORT s3wid_t dict_wordid(dict_t *d, const char *word)
Return word id for given word string if present.
Definition dict.c:399
Size definition of semantically units.
int16 s3cipid_t
Size definitions for more semantially meaningful units.
Definition s3types.h:63
a structure for a dictionary.
Definition dict.h:76
s3wid_t startwid
FOR INTERNAL-USE ONLY.
Definition dict.h:85
s3wid_t finishwid
FOR INTERNAL-USE ONLY.
Definition dict.h:86
bin_mdef_t * mdef
Model definition used for phone IDs; NULL if none used.
Definition dict.h:78
hash_table_t * ht
Hash table for mapping word strings to word ids.
Definition dict.h:80
int32 filler_end
Last filler word id (read from filler dict)
Definition dict.h:84
dictword_t * word
Array of entries in dictionary.
Definition dict.h:79
s3wid_t silwid
FOR INTERNAL-USE ONLY.
Definition dict.h:87
int32 filler_start
First filler word id (read from filler dict)
Definition dict.h:83
int32 n_word
#Occupied entries in dict; ie, excluding empty slots
Definition dict.h:82
int32 max_words
#Entries allocated in dict, including empty slots
Definition dict.h:81
a structure for one dictionary word.
Definition dict.h:63
char * word
Ascii word string.
Definition dict.h:64
int32 pronlen
Pronunciation length.
Definition dict.h:66
s3wid_t basewid
Base pronunciation id.
Definition dict.h:68
s3wid_t alt
Next alternative pronunciation id, NOT_S3WID if none.
Definition dict.h:67
s3cipid_t * ciphone
Pronunciation.
Definition dict.h:65