SphinxBase 0.6
|
00001 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ 00002 /* ==================================================================== 00003 * Copyright (c) 1999-2004 Carnegie Mellon University. All rights 00004 * reserved. 00005 * 00006 * Redistribution and use in source and binary forms, with or without 00007 * modification, are permitted provided that the following conditions 00008 * are met: 00009 * 00010 * 1. Redistributions of source code must retain the above copyright 00011 * notice, this list of conditions and the following disclaimer. 00012 * 00013 * 2. Redistributions in binary form must reproduce the above copyright 00014 * notice, this list of conditions and the following disclaimer in 00015 * the documentation and/or other materials provided with the 00016 * distribution. 00017 * 00018 * 00019 * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 00020 * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 00021 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 00022 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY 00023 * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 00024 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 00025 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 00026 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 00027 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 00028 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 00029 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00030 * 00031 * ==================================================================== 00032 * 00033 */ 00034 00035 /* System headers. */ 00036 #ifdef _WIN32_WCE 00037 /*MC in a debug build it's implicitly included by assert.h 00038 but you need this in a release build */ 00039 #include <windows.h> 00040 #else 00041 #include <time.h> 00042 #endif /* _WIN32_WCE */ 00043 #include <stdio.h> 00044 #include <string.h> 00045 #include <assert.h> 00046 00047 /* SphinxBase headers. */ 00048 #include "sphinxbase/err.h" 00049 #include "sphinxbase/pio.h" 00050 #include "sphinxbase/ckd_alloc.h" 00051 #include "sphinxbase/prim_type.h" 00052 #include "sphinxbase/strfuncs.h" 00053 #include "sphinxbase/hash_table.h" 00054 #include "sphinxbase/fsg_model.h" 00055 00063 struct trans_list_s { 00064 hash_table_t *null_trans; /* Null transitions keyed by state. */ 00065 hash_table_t *trans; /* Lists of non-null transitions keyed by state. */ 00066 }; 00067 00071 struct fsg_arciter_s { 00072 hash_iter_t *itor, *null_itor; 00073 gnode_t *gn; 00074 }; 00075 00076 #define FSG_MODEL_BEGIN_DECL "FSG_BEGIN" 00077 #define FSG_MODEL_END_DECL "FSG_END" 00078 #define FSG_MODEL_N_DECL "N" 00079 #define FSG_MODEL_NUM_STATES_DECL "NUM_STATES" 00080 #define FSG_MODEL_S_DECL "S" 00081 #define FSG_MODEL_START_STATE_DECL "START_STATE" 00082 #define FSG_MODEL_F_DECL "F" 00083 #define FSG_MODEL_FINAL_STATE_DECL "FINAL_STATE" 00084 #define FSG_MODEL_T_DECL "T" 00085 #define FSG_MODEL_TRANSITION_DECL "TRANSITION" 00086 #define FSG_MODEL_COMMENT_CHAR '#' 00087 00088 00089 static int32 00090 nextline_str2words(FILE * fp, int32 * lineno, 00091 char **lineptr, char ***wordptr) 00092 { 00093 for (;;) { 00094 size_t len; 00095 int32 n; 00096 00097 ckd_free(*lineptr); 00098 if ((*lineptr = fread_line(fp, &len)) == NULL) 00099 return -1; 00100 00101 (*lineno)++; 00102 00103 if ((*lineptr)[0] == FSG_MODEL_COMMENT_CHAR) 00104 continue; /* Skip comment lines */ 00105 00106 n = str2words(*lineptr, NULL, 0); 00107 if (n == 0) 00108 continue; /* Skip blank lines */ 00109 00110 /* Abuse of realloc(), but this doesn't have to be fast. */ 00111 if (*wordptr == NULL) 00112 *wordptr = ckd_calloc(n, sizeof(**wordptr)); 00113 else 00114 *wordptr = ckd_realloc(*wordptr, n * sizeof(**wordptr)); 00115 return str2words(*lineptr, *wordptr, n); 00116 } 00117 } 00118 00119 void 00120 fsg_model_trans_add(fsg_model_t * fsg, 00121 int32 from, int32 to, int32 logp, int32 wid) 00122 { 00123 fsg_link_t *link; 00124 glist_t gl; 00125 gnode_t *gn; 00126 00127 if (fsg->trans[from].trans == NULL) 00128 fsg->trans[from].trans = hash_table_new(5, HASH_CASE_YES); 00129 00130 /* Check for duplicate link (i.e., link already exists with label=wid) */ 00131 for (gn = gl = fsg_model_trans(fsg, from, to); gn; gn = gnode_next(gn)) { 00132 link = (fsg_link_t *) gnode_ptr(gn); 00133 if (link->wid == wid) { 00134 if (link->logs2prob < logp) 00135 link->logs2prob = logp; 00136 return; 00137 } 00138 } 00139 00140 /* Create transition object */ 00141 link = listelem_malloc(fsg->link_alloc); 00142 link->from_state = from; 00143 link->to_state = to; 00144 link->logs2prob = logp; 00145 link->wid = wid; 00146 00147 /* Add it to the list of transitions and update the hash table */ 00148 gl = glist_add_ptr(gl, (void *)link); 00149 hash_table_replace_bkey(fsg->trans[from].trans, 00150 (char const *)&link->to_state, 00151 sizeof(link->to_state), gl); 00152 } 00153 00154 int32 00155 fsg_model_tag_trans_add(fsg_model_t * fsg, int32 from, int32 to, int32 logp, int32 wid) 00156 { 00157 fsg_link_t *link, *link2; 00158 00159 /* Check for transition probability */ 00160 if (logp > 0) { 00161 E_FATAL("Null transition prob must be <= 1.0 (state %d -> %d)\n", 00162 from, to); 00163 } 00164 00165 /* Self-loop null transitions (with prob <= 1.0) are redundant */ 00166 if (from == to) 00167 return -1; 00168 00169 if (fsg->trans[from].null_trans == NULL) 00170 fsg->trans[from].null_trans = hash_table_new(5, HASH_CASE_YES); 00171 00172 /* Check for a duplicate link; if found, keep the higher prob */ 00173 link = fsg_model_null_trans(fsg, from, to); 00174 if (link) { 00175 if (link->logs2prob < logp) { 00176 link->logs2prob = logp; 00177 return 0; 00178 } 00179 else 00180 return -1; 00181 } 00182 00183 /* Create null transition object */ 00184 link = listelem_malloc(fsg->link_alloc); 00185 link->from_state = from; 00186 link->to_state = to; 00187 link->logs2prob = logp; 00188 link->wid = -1; 00189 00190 link2 = (fsg_link_t *) 00191 hash_table_enter_bkey(fsg->trans[from].null_trans, 00192 (char const *)&link->to_state, 00193 sizeof(link->to_state), link); 00194 assert(link == link2); 00195 00196 return 1; 00197 } 00198 00199 int32 00200 fsg_model_null_trans_add(fsg_model_t * fsg, int32 from, int32 to, int32 logp) 00201 { 00202 return fsg_model_tag_trans_add(fsg, from, to, logp, -1); 00203 } 00204 00205 glist_t 00206 fsg_model_null_trans_closure(fsg_model_t * fsg, glist_t nulls) 00207 { 00208 gnode_t *gn1, *gn2; 00209 int updated; 00210 fsg_link_t *tl1, *tl2; 00211 int32 k, n; 00212 00213 E_INFO("Computing transitive closure for null transitions\n"); 00214 00215 if (nulls == NULL) { 00216 fsg_link_t *null; 00217 int i, j; 00218 00219 for (i = 0; i < fsg->n_state; ++i) { 00220 for (j = 0; j < fsg->n_state; ++j) { 00221 if ((null = fsg_model_null_trans(fsg, i, j))) 00222 nulls = glist_add_ptr(nulls, null); 00223 } 00224 } 00225 } 00226 00227 /* 00228 * Probably not the most efficient closure implementation, in general, but 00229 * probably reasonably efficient for a sparse null transition matrix. 00230 */ 00231 n = 0; 00232 do { 00233 updated = FALSE; 00234 00235 for (gn1 = nulls; gn1; gn1 = gnode_next(gn1)) { 00236 tl1 = (fsg_link_t *) gnode_ptr(gn1); 00237 assert(tl1->wid < 0); 00238 00239 for (gn2 = nulls; gn2; gn2 = gnode_next(gn2)) { 00240 tl2 = (fsg_link_t *) gnode_ptr(gn2); 00241 00242 if (tl1->to_state == tl2->from_state) { 00243 k = fsg_model_null_trans_add(fsg, 00244 tl1->from_state, 00245 tl2->to_state, 00246 tl1->logs2prob + 00247 tl2->logs2prob); 00248 if (k >= 0) { 00249 updated = TRUE; 00250 if (k > 0) { 00251 nulls = 00252 glist_add_ptr(nulls, (void *) 00253 fsg_model_null_trans 00254 (fsg, tl1->from_state, 00255 tl2->to_state)); 00256 n++; 00257 } 00258 } 00259 } 00260 } 00261 } 00262 } while (updated); 00263 00264 E_INFO("%d null transitions added\n", n); 00265 00266 return nulls; 00267 } 00268 00269 glist_t 00270 fsg_model_trans(fsg_model_t *fsg, int32 i, int32 j) 00271 { 00272 void *val; 00273 00274 if (fsg->trans[i].trans == NULL) 00275 return NULL; 00276 if (hash_table_lookup_bkey(fsg->trans[i].trans, (char const *)&j, 00277 sizeof(j), &val) < 0) 00278 return NULL; 00279 return (glist_t)val; 00280 } 00281 00282 fsg_link_t * 00283 fsg_model_null_trans(fsg_model_t *fsg, int32 i, int32 j) 00284 { 00285 void *val; 00286 00287 if (fsg->trans[i].null_trans == NULL) 00288 return NULL; 00289 if (hash_table_lookup_bkey(fsg->trans[i].null_trans, (char const *)&j, 00290 sizeof(j), &val) < 0) 00291 return NULL; 00292 return (fsg_link_t *)val; 00293 } 00294 00295 fsg_arciter_t * 00296 fsg_model_arcs(fsg_model_t *fsg, int32 i) 00297 { 00298 fsg_arciter_t *itor; 00299 00300 if (fsg->trans[i].trans == NULL && fsg->trans[i].null_trans == NULL) 00301 return NULL; 00302 itor = ckd_calloc(1, sizeof(*itor)); 00303 if (fsg->trans[i].null_trans) 00304 itor->null_itor = hash_table_iter(fsg->trans[i].null_trans); 00305 if (fsg->trans[i].trans) 00306 itor->itor = hash_table_iter(fsg->trans[i].trans); 00307 if (itor->itor != NULL) 00308 itor->gn = hash_entry_val(itor->itor->ent); 00309 return itor; 00310 } 00311 00312 fsg_link_t * 00313 fsg_arciter_get(fsg_arciter_t *itor) 00314 { 00315 /* Iterate over non-null arcs first. */ 00316 if (itor->gn) 00317 return (fsg_link_t *)gnode_ptr(itor->gn); 00318 else if (itor->null_itor) 00319 return (fsg_link_t *)hash_entry_val(itor->null_itor->ent); 00320 else 00321 return NULL; 00322 } 00323 00324 fsg_arciter_t * 00325 fsg_arciter_next(fsg_arciter_t *itor) 00326 { 00327 /* Iterate over non-null arcs first. */ 00328 if (itor->gn) { 00329 itor->gn = gnode_next(itor->gn); 00330 /* Move to the next destination arc. */ 00331 if (itor->gn == NULL) { 00332 itor->itor = hash_table_iter_next(itor->itor); 00333 if (itor->itor != NULL) 00334 itor->gn = hash_entry_val(itor->itor->ent); 00335 else if (itor->null_itor == NULL) 00336 goto stop_iteration; 00337 } 00338 } 00339 else { 00340 if (itor->null_itor == NULL) 00341 goto stop_iteration; 00342 itor->null_itor = hash_table_iter_next(itor->null_itor); 00343 if (itor->null_itor == NULL) 00344 goto stop_iteration; 00345 } 00346 return itor; 00347 stop_iteration: 00348 fsg_arciter_free(itor); 00349 return NULL; 00350 00351 } 00352 00353 void 00354 fsg_arciter_free(fsg_arciter_t *itor) 00355 { 00356 if (itor == NULL) 00357 return; 00358 hash_table_iter_free(itor->null_itor); 00359 hash_table_iter_free(itor->itor); 00360 ckd_free(itor); 00361 } 00362 00363 int 00364 fsg_model_word_id(fsg_model_t *fsg, char const *word) 00365 { 00366 int wid; 00367 00368 /* Search for an existing word matching this. */ 00369 for (wid = 0; wid < fsg->n_word; ++wid) { 00370 if (0 == strcmp(fsg->vocab[wid], word)) 00371 break; 00372 } 00373 /* If not found, add this to the vocab. */ 00374 if (wid == fsg->n_word) 00375 return -1; 00376 return wid; 00377 } 00378 00379 int 00380 fsg_model_word_add(fsg_model_t *fsg, char const *word) 00381 { 00382 int wid; 00383 00384 /* Search for an existing word matching this. */ 00385 wid = fsg_model_word_id(fsg, word); 00386 /* If not found, add this to the vocab. */ 00387 if (wid == -1) { 00388 wid = fsg->n_word; 00389 if (fsg->n_word == fsg->n_word_alloc) { 00390 fsg->n_word_alloc += 10; 00391 fsg->vocab = ckd_realloc(fsg->vocab, 00392 fsg->n_word_alloc * sizeof(*fsg->vocab)); 00393 if (fsg->silwords) 00394 fsg->silwords = bitvec_realloc(fsg->silwords, fsg->n_word_alloc); 00395 if (fsg->altwords) 00396 fsg->altwords = bitvec_realloc(fsg->altwords, fsg->n_word_alloc); 00397 } 00398 ++fsg->n_word; 00399 fsg->vocab[wid] = ckd_salloc(word); 00400 } 00401 return wid; 00402 } 00403 00404 int 00405 fsg_model_add_silence(fsg_model_t * fsg, char const *silword, 00406 int state, float32 silprob) 00407 { 00408 int32 logsilp; 00409 int n_trans, silwid, src; 00410 00411 E_INFO("Adding silence transitions for %s to FSG\n", silword); 00412 00413 silwid = fsg_model_word_add(fsg, silword); 00414 logsilp = (int32) (logmath_log(fsg->lmath, silprob) * fsg->lw); 00415 if (fsg->silwords == NULL) 00416 fsg->silwords = bitvec_alloc(fsg->n_word_alloc); 00417 bitvec_set(fsg->silwords, silwid); 00418 00419 n_trans = 0; 00420 if (state == -1) { 00421 for (src = 0; src < fsg->n_state; src++) { 00422 fsg_model_trans_add(fsg, src, src, logsilp, silwid); 00423 ++n_trans; 00424 } 00425 } 00426 else { 00427 fsg_model_trans_add(fsg, state, state, logsilp, silwid); 00428 ++n_trans; 00429 } 00430 00431 E_INFO("Added %d silence word transitions\n", n_trans); 00432 return n_trans; 00433 } 00434 00435 int 00436 fsg_model_add_alt(fsg_model_t * fsg, char const *baseword, 00437 char const *altword) 00438 { 00439 int i, basewid, altwid; 00440 int ntrans; 00441 00442 /* FIXME: This will get slow, eventually... */ 00443 for (basewid = 0; basewid < fsg->n_word; ++basewid) 00444 if (0 == strcmp(fsg->vocab[basewid], baseword)) 00445 break; 00446 if (basewid == fsg->n_word) { 00447 E_ERROR("Base word %s not present in FSG vocabulary!\n", baseword); 00448 return -1; 00449 } 00450 altwid = fsg_model_word_add(fsg, altword); 00451 if (fsg->altwords == NULL) 00452 fsg->altwords = bitvec_alloc(fsg->n_word_alloc); 00453 bitvec_set(fsg->altwords, altwid); 00454 00455 E_DEBUG(2,("Adding alternate word transitions (%s,%s) to FSG\n", 00456 baseword, altword)); 00457 00458 /* Look for all transitions involving baseword and duplicate them. */ 00459 /* FIXME: This will also get slow, eventually... */ 00460 ntrans = 0; 00461 for (i = 0; i < fsg->n_state; ++i) { 00462 hash_iter_t *itor; 00463 if (fsg->trans[i].trans == NULL) 00464 continue; 00465 for (itor = hash_table_iter(fsg->trans[i].trans); itor; 00466 itor = hash_table_iter_next(itor)) { 00467 glist_t trans; 00468 gnode_t *gn; 00469 00470 trans = hash_entry_val(itor->ent); 00471 for (gn = trans; gn; gn = gnode_next(gn)) { 00472 fsg_link_t *fl = gnode_ptr(gn); 00473 if (fl->wid == basewid) { 00474 fsg_link_t *link; 00475 00476 /* Create transition object */ 00477 link = listelem_malloc(fsg->link_alloc); 00478 link->from_state = fl->from_state; 00479 link->to_state = fl->to_state; 00480 link->logs2prob = fl->logs2prob; /* FIXME!!!??? */ 00481 link->wid = altwid; 00482 00483 trans = 00484 glist_add_ptr(trans, (void *) link); 00485 ++ntrans; 00486 } 00487 } 00488 hash_entry_val(itor->ent) = trans; 00489 } 00490 } 00491 00492 E_DEBUG(2,("Added %d alternate word transitions\n", ntrans)); 00493 return ntrans; 00494 } 00495 00496 00497 fsg_model_t * 00498 fsg_model_init(char const *name, logmath_t *lmath, float32 lw, int32 n_state) 00499 { 00500 fsg_model_t *fsg; 00501 00502 /* Allocate basic stuff. */ 00503 fsg = ckd_calloc(1, sizeof(*fsg)); 00504 fsg->refcount = 1; 00505 fsg->link_alloc = listelem_alloc_init(sizeof(fsg_link_t)); 00506 fsg->lmath = lmath; 00507 fsg->name = name ? ckd_salloc(name) : NULL; 00508 fsg->n_state = n_state; 00509 fsg->lw = lw; 00510 00511 fsg->trans = ckd_calloc(fsg->n_state, sizeof(*fsg->trans)); 00512 00513 return fsg; 00514 } 00515 00516 fsg_model_t * 00517 fsg_model_read(FILE * fp, logmath_t *lmath, float32 lw) 00518 { 00519 fsg_model_t *fsg; 00520 hash_table_t *vocab; 00521 hash_iter_t *itor; 00522 int32 lastwid; 00523 char **wordptr; 00524 char *lineptr; 00525 char *fsgname; 00526 int32 lineno; 00527 int32 n, i, j; 00528 int n_state, n_trans, n_null_trans; 00529 glist_t nulls; 00530 float32 p; 00531 00532 lineno = 0; 00533 vocab = hash_table_new(32, FALSE); 00534 wordptr = NULL; 00535 lineptr = NULL; 00536 nulls = NULL; 00537 fsgname = NULL; 00538 fsg = NULL; 00539 00540 /* Scan upto FSG_BEGIN header */ 00541 for (;;) { 00542 n = nextline_str2words(fp, &lineno, &lineptr, &wordptr); 00543 if (n < 0) { 00544 E_ERROR("%s declaration missing\n", FSG_MODEL_BEGIN_DECL); 00545 goto parse_error; 00546 } 00547 00548 if ((strcmp(wordptr[0], FSG_MODEL_BEGIN_DECL) == 0)) { 00549 if (n > 2) { 00550 E_ERROR("Line[%d]: malformed FSG_BEGIN declaration\n", 00551 lineno); 00552 goto parse_error; 00553 } 00554 break; 00555 } 00556 } 00557 /* Save FSG name, or it will get clobbered below :(. 00558 * If name is missing, try the default. 00559 */ 00560 if (n == 2) { 00561 fsgname = ckd_salloc(wordptr[1]); 00562 } else { 00563 E_WARN ("FSG name is missing\n"); 00564 fsgname = ckd_salloc("unknown"); 00565 } 00566 00567 /* Read #states */ 00568 n = nextline_str2words(fp, &lineno, &lineptr, &wordptr); 00569 if ((n != 2) 00570 || ((strcmp(wordptr[0], FSG_MODEL_N_DECL) != 0) 00571 && (strcmp(wordptr[0], FSG_MODEL_NUM_STATES_DECL) != 0)) 00572 || (sscanf(wordptr[1], "%d", &n_state) != 1) 00573 || (n_state <= 0)) { 00574 E_ERROR 00575 ("Line[%d]: #states declaration line missing or malformed\n", 00576 lineno); 00577 goto parse_error; 00578 } 00579 00580 /* Now create the FSG. */ 00581 fsg = fsg_model_init(fsgname, lmath, lw, n_state); 00582 ckd_free(fsgname); 00583 fsgname = NULL; 00584 00585 /* Read start state */ 00586 n = nextline_str2words(fp, &lineno, &lineptr, &wordptr); 00587 if ((n != 2) 00588 || ((strcmp(wordptr[0], FSG_MODEL_S_DECL) != 0) 00589 && (strcmp(wordptr[0], FSG_MODEL_START_STATE_DECL) != 0)) 00590 || (sscanf(wordptr[1], "%d", &(fsg->start_state)) != 1) 00591 || (fsg->start_state < 0) 00592 || (fsg->start_state >= fsg->n_state)) { 00593 E_ERROR 00594 ("Line[%d]: start state declaration line missing or malformed\n", 00595 lineno); 00596 goto parse_error; 00597 } 00598 00599 /* Read final state */ 00600 n = nextline_str2words(fp, &lineno, &lineptr, &wordptr); 00601 if ((n != 2) 00602 || ((strcmp(wordptr[0], FSG_MODEL_F_DECL) != 0) 00603 && (strcmp(wordptr[0], FSG_MODEL_FINAL_STATE_DECL) != 0)) 00604 || (sscanf(wordptr[1], "%d", &(fsg->final_state)) != 1) 00605 || (fsg->final_state < 0) 00606 || (fsg->final_state >= fsg->n_state)) { 00607 E_ERROR 00608 ("Line[%d]: final state declaration line missing or malformed\n", 00609 lineno); 00610 goto parse_error; 00611 } 00612 00613 /* Read transitions */ 00614 lastwid = 0; 00615 n_trans = n_null_trans = 0; 00616 for (;;) { 00617 int32 wid, tprob; 00618 00619 n = nextline_str2words(fp, &lineno, &lineptr, &wordptr); 00620 if (n <= 0) { 00621 E_ERROR("Line[%d]: transition or FSG_END statement expected\n", 00622 lineno); 00623 goto parse_error; 00624 } 00625 00626 if ((strcmp(wordptr[0], FSG_MODEL_END_DECL) == 0)) { 00627 break; 00628 } 00629 00630 if ((strcmp(wordptr[0], FSG_MODEL_T_DECL) == 0) 00631 || (strcmp(wordptr[0], FSG_MODEL_TRANSITION_DECL) == 0)) { 00632 00633 00634 if (((n != 4) && (n != 5)) 00635 || (sscanf(wordptr[1], "%d", &i) != 1) 00636 || (sscanf(wordptr[2], "%d", &j) != 1) 00637 || (i < 0) || (i >= fsg->n_state) 00638 || (j < 0) || (j >= fsg->n_state)) { 00639 E_ERROR 00640 ("Line[%d]: transition spec malformed; Expecting: from-state to-state trans-prob [word]\n", 00641 lineno); 00642 goto parse_error; 00643 } 00644 00645 p = atof_c(wordptr[3]); 00646 if ((p <= 0.0) || (p > 1.0)) { 00647 E_ERROR 00648 ("Line[%d]: transition spec malformed; Expecting float as transition probability\n", 00649 lineno); 00650 goto parse_error; 00651 } 00652 } 00653 else { 00654 E_ERROR("Line[%d]: transition or FSG_END statement expected\n", 00655 lineno); 00656 goto parse_error; 00657 } 00658 00659 tprob = (int32)(logmath_log(lmath, p) * fsg->lw); 00660 /* Add word to "dictionary". */ 00661 if (n > 4) { 00662 if (hash_table_lookup_int32(vocab, wordptr[4], &wid) < 0) { 00663 (void)hash_table_enter_int32(vocab, ckd_salloc(wordptr[4]), lastwid); 00664 wid = lastwid; 00665 ++lastwid; 00666 } 00667 fsg_model_trans_add(fsg, i, j, tprob, wid); 00668 ++n_trans; 00669 } 00670 else { 00671 if (fsg_model_null_trans_add(fsg, i, j, tprob) == 1) { 00672 ++n_null_trans; 00673 nulls = glist_add_ptr(nulls, fsg_model_null_trans(fsg, i, j)); 00674 } 00675 } 00676 } 00677 00678 E_INFO("FSG: %d states, %d unique words, %d transitions (%d null)\n", 00679 fsg->n_state, hash_table_inuse(vocab), n_trans, n_null_trans); 00680 00681 /* Do transitive closure on null transitions */ 00682 nulls = fsg_model_null_trans_closure(fsg, nulls); 00683 glist_free(nulls); 00684 00685 /* Now create a string table from the "dictionary" */ 00686 fsg->n_word = hash_table_inuse(vocab); 00687 fsg->n_word_alloc = fsg->n_word + 10; /* Pad it a bit. */ 00688 fsg->vocab = ckd_calloc(fsg->n_word_alloc, sizeof(*fsg->vocab)); 00689 for (itor = hash_table_iter(vocab); itor; itor = hash_table_iter_next(itor)) { 00690 char const *word = hash_entry_key(itor->ent); 00691 int32 wid = (int32)(long)hash_entry_val(itor->ent); 00692 fsg->vocab[wid] = (char *)word; 00693 } 00694 hash_table_free(vocab); 00695 ckd_free(lineptr); 00696 ckd_free(wordptr); 00697 00698 return fsg; 00699 00700 parse_error: 00701 for (itor = hash_table_iter(vocab); itor; itor = hash_table_iter_next(itor)) 00702 ckd_free((char *)hash_entry_key(itor->ent)); 00703 glist_free(nulls); 00704 hash_table_free(vocab); 00705 ckd_free(fsgname); 00706 ckd_free(lineptr); 00707 ckd_free(wordptr); 00708 fsg_model_free(fsg); 00709 return NULL; 00710 } 00711 00712 00713 fsg_model_t * 00714 fsg_model_readfile(const char *file, logmath_t *lmath, float32 lw) 00715 { 00716 FILE *fp; 00717 fsg_model_t *fsg; 00718 00719 if ((fp = fopen(file, "r")) == NULL) { 00720 E_ERROR("Failed to open FSG file '%s' for reading: %s\n", file, strerror(errno)); 00721 return NULL; 00722 } 00723 fsg = fsg_model_read(fp, lmath, lw); 00724 fclose(fp); 00725 return fsg; 00726 } 00727 00728 fsg_model_t * 00729 fsg_model_retain(fsg_model_t *fsg) 00730 { 00731 ++fsg->refcount; 00732 return fsg; 00733 } 00734 00735 static void 00736 trans_list_free(fsg_model_t *fsg, int32 i) 00737 { 00738 hash_iter_t *itor; 00739 00740 /* FIXME (maybe): FSG links will all get freed when we call 00741 * listelem_alloc_free() so don't bother freeing them explicitly 00742 * here. */ 00743 if (fsg->trans[i].trans) { 00744 for (itor = hash_table_iter(fsg->trans[i].trans); 00745 itor; itor = hash_table_iter_next(itor)) { 00746 glist_t gl = (glist_t)hash_entry_val(itor->ent); 00747 glist_free(gl); 00748 } 00749 } 00750 hash_table_free(fsg->trans[i].trans); 00751 hash_table_free(fsg->trans[i].null_trans); 00752 } 00753 00754 int 00755 fsg_model_free(fsg_model_t * fsg) 00756 { 00757 int i; 00758 00759 if (fsg == NULL) 00760 return 0; 00761 00762 if (--fsg->refcount > 0) 00763 return fsg->refcount; 00764 00765 for (i = 0; i < fsg->n_word; ++i) 00766 ckd_free(fsg->vocab[i]); 00767 for (i = 0; i < fsg->n_state; ++i) 00768 trans_list_free(fsg, i); 00769 ckd_free(fsg->trans); 00770 ckd_free(fsg->vocab); 00771 listelem_alloc_free(fsg->link_alloc); 00772 bitvec_free(fsg->silwords); 00773 bitvec_free(fsg->altwords); 00774 ckd_free(fsg->name); 00775 ckd_free(fsg); 00776 return 0; 00777 } 00778 00779 00780 void 00781 fsg_model_write(fsg_model_t * fsg, FILE * fp) 00782 { 00783 int32 i; 00784 00785 fprintf(fp, "%s %s\n", FSG_MODEL_BEGIN_DECL, fsg->name ? fsg->name : ""); 00786 fprintf(fp, "%s %d\n", FSG_MODEL_NUM_STATES_DECL, fsg->n_state); 00787 fprintf(fp, "%s %d\n", FSG_MODEL_START_STATE_DECL, fsg->start_state); 00788 fprintf(fp, "%s %d\n", FSG_MODEL_FINAL_STATE_DECL, fsg->final_state); 00789 00790 for (i = 0; i < fsg->n_state; i++) { 00791 fsg_arciter_t *itor; 00792 00793 for (itor = fsg_model_arcs(fsg, i); itor; itor = fsg_arciter_next(itor)) { 00794 fsg_link_t *tl = fsg_arciter_get(itor); 00795 00796 fprintf(fp, "%s %d %d %f %s\n", FSG_MODEL_TRANSITION_DECL, 00797 tl->from_state, tl->to_state, 00798 logmath_exp(fsg->lmath, (int32)(tl->logs2prob / fsg->lw)), 00799 (tl->wid < 0) ? "" : fsg_model_word_str(fsg, tl->wid)); 00800 } 00801 } 00802 00803 fprintf(fp, "%s\n", FSG_MODEL_END_DECL); 00804 00805 fflush(fp); 00806 } 00807 00808 void 00809 fsg_model_writefile(fsg_model_t *fsg, char const *file) 00810 { 00811 FILE *fp; 00812 00813 assert(fsg); 00814 00815 E_INFO("Writing FSG file '%s'\n", file); 00816 00817 if ((fp = fopen(file, "w")) == NULL) { 00818 E_ERROR("Failed to open FSG file '%s' for reading: %s\n", file, strerror(errno)); 00819 return; 00820 } 00821 00822 fsg_model_write(fsg, fp); 00823 00824 fclose(fp); 00825 } 00826 00827 static void 00828 fsg_model_write_fsm_trans(fsg_model_t *fsg, int i, FILE *fp) 00829 { 00830 fsg_arciter_t *itor; 00831 00832 for (itor = fsg_model_arcs(fsg, i); itor; 00833 itor = fsg_arciter_next(itor)) { 00834 fsg_link_t *tl = fsg_arciter_get(itor); 00835 fprintf(fp, "%d %d %s %f\n", 00836 tl->from_state, tl->to_state, 00837 (tl->wid < 0) ? "<eps>" : fsg_model_word_str(fsg, tl->wid), 00838 -logmath_log_to_ln(fsg->lmath, tl->logs2prob / fsg->lw)); 00839 } 00840 } 00841 00842 void 00843 fsg_model_write_fsm(fsg_model_t * fsg, FILE * fp) 00844 { 00845 int i; 00846 00847 /* Write transitions from initial state first. */ 00848 fsg_model_write_fsm_trans(fsg, fsg_model_start_state(fsg), fp); 00849 00850 /* Other states. */ 00851 for (i = 0; i < fsg->n_state; i++) { 00852 if (i == fsg_model_start_state(fsg)) 00853 continue; 00854 fsg_model_write_fsm_trans(fsg, i, fp); 00855 } 00856 00857 /* Final state. */ 00858 fprintf(fp, "%d 0\n", fsg_model_final_state(fsg)); 00859 00860 fflush(fp); 00861 } 00862 00863 void 00864 fsg_model_writefile_fsm(fsg_model_t *fsg, char const *file) 00865 { 00866 FILE *fp; 00867 00868 assert(fsg); 00869 00870 E_INFO("Writing FSM file '%s'\n", file); 00871 00872 if ((fp = fopen(file, "w")) == NULL) { 00873 E_ERROR("Failed to open fsm file '%s' for writing: %s\n", file, strerror(errno)); 00874 return; 00875 } 00876 00877 fsg_model_write_fsm(fsg, fp); 00878 00879 fclose(fp); 00880 } 00881 00882 void 00883 fsg_model_write_symtab(fsg_model_t *fsg, FILE *file) 00884 { 00885 int i; 00886 00887 fprintf(file, "<eps> 0\n"); 00888 for (i = 0; i < fsg_model_n_word(fsg); ++i) { 00889 fprintf(file, "%s %d\n", fsg_model_word_str(fsg, i), i + 1); 00890 } 00891 fflush(file); 00892 } 00893 00894 void 00895 fsg_model_writefile_symtab(fsg_model_t *fsg, char const *file) 00896 { 00897 FILE *fp; 00898 00899 assert(fsg); 00900 00901 E_INFO("Writing FSM symbol table '%s'\n", file); 00902 00903 if ((fp = fopen(file, "w")) == NULL) { 00904 E_ERROR("Failed to open symbol table '%s' for writing: %s\n", file, strerror(errno)); 00905 return; 00906 } 00907 00908 fsg_model_write_symtab(fsg, fp); 00909 00910 fclose(fp); 00911 }