00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038 #include <string.h>
00039 #include <assert.h>
00040
00041 #include "ckd_alloc.h"
00042 #include "strfuncs.h"
00043 #include "hash_table.h"
00044 #include "err.h"
00045
00046 #include "jsgf_internal.h"
00047 #include "jsgf_parser.h"
00048 #include "jsgf_scanner.h"
00049
00050 int yyparse (yyscan_t yyscanner, jsgf_t *jsgf);
00051
00059 jsgf_atom_t *
00060 jsgf_atom_new(char *name, float weight)
00061 {
00062 jsgf_atom_t *atom;
00063
00064 atom = ckd_calloc(1, sizeof(*atom));
00065 atom->name = ckd_salloc(name);
00066 atom->weight = weight;
00067 return atom;
00068 }
00069
00070 int
00071 jsgf_atom_free(jsgf_atom_t *atom)
00072 {
00073 if (atom == NULL)
00074 return 0;
00075 ckd_free(atom->name);
00076 ckd_free(atom);
00077 return 0;
00078 }
00079
00080 jsgf_t *
00081 jsgf_grammar_new(jsgf_t *parent)
00082 {
00083 jsgf_t *grammar;
00084
00085 grammar = ckd_calloc(1, sizeof(*grammar));
00086
00087
00088 if (parent) {
00089 grammar->rules = parent->rules;
00090 grammar->imports = parent->imports;
00091 grammar->searchpath = parent->searchpath;
00092 grammar->parent = parent;
00093 }
00094 else {
00095 char *jsgf_path;
00096
00097 grammar->rules = hash_table_new(64, 0);
00098 grammar->imports = hash_table_new(16, 0);
00099 if ((jsgf_path = getenv("JSGF_PATH")) != NULL) {
00100 char *word, *c;
00101
00102
00103
00104 word = jsgf_path = ckd_salloc(jsgf_path);
00105 while ((c = strchr(word, ':'))) {
00106 *c = '\0';
00107 grammar->searchpath = glist_add_ptr(grammar->searchpath, word);
00108 word = c + 1;
00109 }
00110 grammar->searchpath = glist_add_ptr(grammar->searchpath, word);
00111 grammar->searchpath = glist_reverse(grammar->searchpath);
00112 }
00113 else {
00114
00115 grammar->searchpath = glist_add_ptr(grammar->searchpath, ckd_salloc("."));
00116 }
00117 }
00118
00119 return grammar;
00120 }
00121
00122 void
00123 jsgf_grammar_free(jsgf_t *jsgf)
00124 {
00125
00126 if (jsgf->parent == NULL) {
00127 hash_iter_t *itor;
00128 gnode_t *gn;
00129
00130 for (itor = hash_table_iter(jsgf->rules); itor;
00131 itor = hash_table_iter_next(itor)) {
00132 ckd_free((char *)itor->ent->key);
00133 jsgf_rule_free((jsgf_rule_t *)itor->ent->val);
00134 }
00135 hash_table_free(jsgf->rules);
00136 for (itor = hash_table_iter(jsgf->imports); itor;
00137 itor = hash_table_iter_next(itor)) {
00138 ckd_free((char *)itor->ent->key);
00139 jsgf_grammar_free((jsgf_t *)itor->ent->val);
00140 }
00141 hash_table_free(jsgf->imports);
00142 for (gn = jsgf->searchpath; gn; gn = gnode_next(gn))
00143 ckd_free(gnode_ptr(gn));
00144 glist_free(jsgf->searchpath);
00145 for (gn = jsgf->links; gn; gn = gnode_next(gn))
00146 ckd_free(gnode_ptr(gn));
00147 glist_free(jsgf->links);
00148 }
00149 ckd_free(jsgf->name);
00150 ckd_free(jsgf->version);
00151 ckd_free(jsgf->charset);
00152 ckd_free(jsgf->locale);
00153 ckd_free(jsgf);
00154 }
00155
00156 static void
00157 jsgf_rhs_free(jsgf_rhs_t *rhs)
00158 {
00159 gnode_t *gn;
00160
00161 if (rhs == NULL)
00162 return;
00163
00164 jsgf_rhs_free(rhs->alt);
00165 for (gn = rhs->atoms; gn; gn = gnode_next(gn))
00166 jsgf_atom_free(gnode_ptr(gn));
00167 glist_free(rhs->atoms);
00168 ckd_free(rhs);
00169 }
00170
00171 jsgf_atom_t *
00172 jsgf_kleene_new(jsgf_t *jsgf, jsgf_atom_t *atom, int plus)
00173 {
00174 jsgf_rule_t *rule;
00175 jsgf_atom_t *rule_atom;
00176 jsgf_rhs_t *rhs;
00177
00178
00179
00180 rhs = ckd_calloc(1, sizeof(*rhs));
00181 if (plus)
00182 rhs->atoms = glist_add_ptr(NULL, jsgf_atom_new(atom->name, 1.0));
00183 else
00184 rhs->atoms = glist_add_ptr(NULL, jsgf_atom_new("<NULL>", 1.0));
00185 rule = jsgf_define_rule(jsgf, NULL, rhs, 0);
00186 rule_atom = jsgf_atom_new(rule->name, 1.0);
00187 rhs = ckd_calloc(1, sizeof(*rhs));
00188 rhs->atoms = glist_add_ptr(NULL, rule_atom);
00189 rhs->atoms = glist_add_ptr(rhs->atoms, atom);
00190 rule->rhs->alt = rhs;
00191
00192 return jsgf_atom_new(rule->name, 1.0);
00193 }
00194
00195 jsgf_rule_t *
00196 jsgf_optional_new(jsgf_t *jsgf, jsgf_rhs_t *exp)
00197 {
00198 jsgf_rhs_t *rhs = ckd_calloc(1, sizeof(*rhs));
00199 jsgf_atom_t *atom = jsgf_atom_new("<NULL>", 1.0);
00200 rhs->alt = exp;
00201 rhs->atoms = glist_add_ptr(NULL, atom);
00202 return jsgf_define_rule(jsgf, NULL, rhs, 0);
00203 }
00204
00205 void
00206 jsgf_add_link(jsgf_t *grammar, jsgf_atom_t *atom, int from, int to)
00207 {
00208 jsgf_link_t *link;
00209
00210 link = ckd_calloc(1, sizeof(*link));
00211 link->from = from;
00212 link->to = to;
00213 link->atom = atom;
00214 grammar->links = glist_add_ptr(grammar->links, link);
00215 }
00216
00217 static char *
00218 extract_grammar_name(char *rule_name)
00219 {
00220 char* dot_pos;
00221 char* grammar_name = ckd_salloc(rule_name+1);
00222 if ((dot_pos = strrchr(grammar_name + 1, '.')) == NULL) {
00223 ckd_free(grammar_name);
00224 return NULL;
00225 }
00226 *dot_pos='\0';
00227 return grammar_name;
00228 }
00229
00230 static char *
00231 jsgf_fullname(jsgf_t *jsgf, const char *name)
00232 {
00233 char *fullname;
00234
00235
00236 if (strchr(name + 1, '.'))
00237 return ckd_salloc(name);
00238
00239
00240 fullname = ckd_malloc(strlen(jsgf->name) + strlen(name) + 4);
00241 sprintf(fullname, "<%s.%s", jsgf->name, name + 1);
00242 return fullname;
00243 }
00244
00245 static char *
00246 jsgf_fullname_from_rule(jsgf_rule_t *rule, const char *name)
00247 {
00248 char *fullname, *grammar_name;
00249
00250
00251 if (strchr(name + 1, '.'))
00252 return ckd_salloc(name);
00253
00254
00255 if ((grammar_name = extract_grammar_name(rule->name)) == NULL)
00256 return ckd_salloc(name);
00257 fullname = ckd_malloc(strlen(grammar_name) + strlen(name) + 4);
00258 sprintf(fullname, "<%s.%s", grammar_name, name + 1);
00259 ckd_free(grammar_name);
00260
00261 return fullname;
00262 }
00263
00264
00265
00266 static char *
00267 importname2rulename(char *importname)
00268 {
00269 char *rulename = ckd_salloc(importname);
00270 char *last_dotpos;
00271 char *secondlast_dotpos;
00272
00273 if ((last_dotpos = strrchr(rulename+1, '.')) != NULL) {
00274 *last_dotpos='\0';
00275 if ((secondlast_dotpos = strrchr(rulename+1, '.')) != NULL) {
00276 *last_dotpos='.';
00277 *secondlast_dotpos='<';
00278 secondlast_dotpos = ckd_salloc(secondlast_dotpos);
00279 ckd_free(rulename);
00280 return secondlast_dotpos;
00281 }
00282 else {
00283 *last_dotpos='.';
00284 return rulename;
00285 }
00286 }
00287 else {
00288 return rulename;
00289 }
00290 }
00291
00292 static int expand_rule(jsgf_t *grammar, jsgf_rule_t *rule);
00293 static int
00294 expand_rhs(jsgf_t *grammar, jsgf_rule_t *rule, jsgf_rhs_t *rhs)
00295 {
00296 gnode_t *gn;
00297 int lastnode;
00298
00299
00300 lastnode = rule->entry;
00301
00302
00303 for (gn = rhs->atoms; gn; gn = gnode_next(gn)) {
00304 jsgf_atom_t *atom = gnode_ptr(gn);
00305 if (jsgf_atom_is_rule(atom)) {
00306 jsgf_rule_t *subrule;
00307 char *fullname;
00308 gnode_t *subnode;
00309 void *val;
00310
00311
00312 if (0 == strcmp(atom->name, "<NULL>")) {
00313
00314 jsgf_add_link(grammar, atom,
00315 lastnode, grammar->nstate);
00316 lastnode = grammar->nstate;
00317 ++grammar->nstate;
00318 continue;
00319 }
00320 else if (0 == strcmp(atom->name, "<VOID>")) {
00321
00322 return -1;
00323 }
00324
00325 fullname = jsgf_fullname_from_rule(rule, atom->name);
00326 if (hash_table_lookup(grammar->rules, fullname, &val) == -1) {
00327 E_ERROR("Undefined rule in RHS: %s\n", fullname);
00328 ckd_free(fullname);
00329 return -1;
00330 }
00331 ckd_free(fullname);
00332 subrule = val;
00333
00334 for (subnode = grammar->rulestack; subnode; subnode = gnode_next(subnode))
00335 if (gnode_ptr(subnode) == (void *)subrule)
00336 break;
00337 if (subnode != NULL) {
00338
00339 if (gnode_next(gn) != NULL) {
00340 E_ERROR("Only right-recursion is permitted (in %s.%s)\n",
00341 grammar->name, rule->name);
00342 return -1;
00343 }
00344
00345 E_INFO("Right recursion %s %d => %d\n", atom->name, lastnode, subrule->entry);
00346 jsgf_add_link(grammar, atom, lastnode, subrule->entry);
00347 }
00348 else {
00349
00350 if (expand_rule(grammar, subrule) == -1)
00351 return -1;
00352
00353 jsgf_add_link(grammar, atom,
00354 lastnode, subrule->entry);
00355 lastnode = subrule->exit;
00356 }
00357 }
00358 else {
00359
00360 jsgf_add_link(grammar, atom,
00361 lastnode, grammar->nstate);
00362 lastnode = grammar->nstate;
00363 ++grammar->nstate;
00364 }
00365 }
00366
00367 return lastnode;
00368 }
00369
00370 static int
00371 expand_rule(jsgf_t *grammar, jsgf_rule_t *rule)
00372 {
00373 jsgf_rhs_t *rhs;
00374 float norm;
00375
00376
00377 grammar->rulestack = glist_add_ptr(grammar->rulestack, rule);
00378
00379
00380 norm = 0;
00381 for (rhs = rule->rhs; rhs; rhs = rhs->alt) {
00382 if (rhs->atoms) {
00383 jsgf_atom_t *atom = gnode_ptr(rhs->atoms);
00384 norm += atom->weight;
00385 }
00386 }
00387
00388 rule->entry = grammar->nstate++;
00389 rule->exit = grammar->nstate++;
00390 if (norm == 0) norm = 1;
00391 for (rhs = rule->rhs; rhs; rhs = rhs->alt) {
00392 int lastnode;
00393
00394 if (rhs->atoms) {
00395 jsgf_atom_t *atom = gnode_ptr(rhs->atoms);
00396 atom->weight /= norm;
00397 }
00398 lastnode = expand_rhs(grammar, rule, rhs);
00399 if (lastnode == -1) {
00400 return -1;
00401 }
00402 else {
00403 jsgf_add_link(grammar, NULL, lastnode, rule->exit);
00404 }
00405 }
00406
00407
00408 grammar->rulestack = gnode_free(grammar->rulestack, NULL);
00409 return rule->exit;
00410 }
00411
00412 jsgf_rule_iter_t *
00413 jsgf_rule_iter(jsgf_t *grammar)
00414 {
00415 return hash_table_iter(grammar->rules);
00416 }
00417
00418 jsgf_rule_t *
00419 jsgf_get_rule(jsgf_t *grammar, char const *name)
00420 {
00421 void *val;
00422
00423 if (hash_table_lookup(grammar->rules, name, &val) < 0)
00424 return NULL;
00425 return (jsgf_rule_t *)val;
00426 }
00427
00428 char const *
00429 jsgf_rule_name(jsgf_rule_t *rule)
00430 {
00431 return rule->name;
00432 }
00433
00434 int
00435 jsgf_rule_public(jsgf_rule_t *rule)
00436 {
00437 return rule->public;
00438 }
00439
00440 fsg_model_t *
00441 jsgf_build_fsg(jsgf_t *grammar, jsgf_rule_t *rule, logmath_t *lmath, float32 lw)
00442 {
00443 fsg_model_t *fsg;
00444 glist_t nulls;
00445 gnode_t *gn;
00446
00447
00448 for (gn = grammar->links; gn; gn = gnode_next(gn)) {
00449 ckd_free(gnode_ptr(gn));
00450 }
00451 glist_free(grammar->links);
00452 grammar->links = NULL;
00453 rule->entry = rule->exit = 0;
00454 grammar->nstate = 0;
00455 expand_rule(grammar, rule);
00456
00457 fsg = fsg_model_init(rule->name, lmath, lw, grammar->nstate);
00458 fsg->start_state = rule->entry;
00459 fsg->final_state = rule->exit;
00460 grammar->links = glist_reverse(grammar->links);
00461 for (gn = grammar->links; gn; gn = gnode_next(gn)) {
00462 jsgf_link_t *link = gnode_ptr(gn);
00463
00464 if (link->atom) {
00465 if (jsgf_atom_is_rule(link->atom)) {
00466 fsg_model_null_trans_add(fsg, link->from, link->to,
00467 logmath_log(lmath, link->atom->weight));
00468 }
00469 else {
00470 int wid = fsg_model_word_add(fsg, link->atom->name);
00471 fsg_model_trans_add(fsg, link->from, link->to,
00472 logmath_log(lmath, link->atom->weight), wid);
00473 }
00474 }
00475 else {
00476 fsg_model_null_trans_add(fsg, link->from, link->to, 0);
00477 }
00478 }
00479 nulls = fsg_model_null_trans_closure(fsg, NULL);
00480 glist_free(nulls);
00481
00482 return fsg;
00483 }
00484
00485 int
00486 jsgf_write_fsg(jsgf_t *grammar, jsgf_rule_t *rule, FILE *outfh)
00487 {
00488 gnode_t *gn;
00489
00490
00491 for (gn = grammar->links; gn; gn = gnode_next(gn)) {
00492 ckd_free(gnode_ptr(gn));
00493 }
00494 glist_free(grammar->links);
00495 grammar->links = NULL;
00496 rule->entry = rule->exit = 0;
00497 grammar->nstate = 0;
00498
00499 expand_rule(grammar, rule);
00500
00501 printf("FSG_BEGIN %s\n", rule->name);
00502 printf("NUM_STATES %d\n", grammar->nstate);
00503 printf("START_STATE %d\n", rule->entry);
00504 printf("FINAL_STATE %d\n", rule->exit);
00505 printf("\n# Transitions\n");
00506
00507 grammar->links = glist_reverse(grammar->links);
00508 for (gn = grammar->links; gn; gn = gnode_next(gn)) {
00509 jsgf_link_t *link = gnode_ptr(gn);
00510
00511 if (link->atom) {
00512 if (jsgf_atom_is_rule(link->atom)) {
00513 printf("TRANSITION %d %d %f\n",
00514 link->from, link->to,
00515 link->atom->weight);
00516 }
00517 else {
00518 printf("TRANSITION %d %d %f %s\n",
00519 link->from, link->to,
00520 link->atom->weight, link->atom->name);
00521 }
00522 }
00523 else {
00524 printf("TRANSITION %d %d %f\n", link->from, link->to, 1.0);
00525 }
00526 }
00527
00528 printf("FSG_END\n");
00529
00530 return 0;
00531 }
00532
00533 jsgf_rule_t *
00534 jsgf_define_rule(jsgf_t *jsgf, char *name, jsgf_rhs_t *rhs, int public)
00535 {
00536 jsgf_rule_t *rule;
00537 void *val;
00538
00539 if (name == NULL) {
00540 name = ckd_malloc(strlen(jsgf->name) + 16);
00541 sprintf(name, "<%s.g%05d>", jsgf->name, hash_table_inuse(jsgf->rules));
00542 }
00543 else {
00544 char *newname;
00545
00546 newname = jsgf_fullname(jsgf, name);
00547 name = newname;
00548 }
00549
00550 rule = ckd_calloc(1, sizeof(*rule));
00551 rule->refcnt = 1;
00552 rule->name = ckd_salloc(name);
00553 rule->rhs = rhs;
00554 rule->public = public;
00555
00556 E_INFO("Defined rule: %s%s\n",
00557 rule->public ? "PUBLIC " : "",
00558 rule->name);
00559 val = hash_table_enter(jsgf->rules, name, rule);
00560 if (val != (void *)rule) {
00561 E_WARN("Multiply defined symbol: %s\n", name);
00562 }
00563 return rule;
00564 }
00565
00566 jsgf_rule_t *
00567 jsgf_rule_retain(jsgf_rule_t *rule)
00568 {
00569 ++rule->refcnt;
00570 return rule;
00571 }
00572
00573 int
00574 jsgf_rule_free(jsgf_rule_t *rule)
00575 {
00576 if (rule == NULL)
00577 return 0;
00578 if (--rule->refcnt > 0)
00579 return rule->refcnt;
00580 jsgf_rhs_free(rule->rhs);
00581 ckd_free(rule->name);
00582 ckd_free(rule);
00583 return 0;
00584 }
00585
00586
00587
00588 static char *
00589 path_list_search(glist_t paths, char *path)
00590 {
00591 gnode_t *gn;
00592
00593 for (gn = paths; gn; gn = gnode_next(gn)) {
00594 char *fullpath;
00595 FILE *tmp;
00596
00597 fullpath = string_join(gnode_ptr(gn), "/", path, NULL);
00598 tmp = fopen(fullpath, "r");
00599 if (tmp != NULL) {
00600 fclose(tmp);
00601 return fullpath;
00602 }
00603 else
00604 ckd_free(fullpath);
00605 }
00606 return NULL;
00607 }
00608
00609 jsgf_rule_t *
00610 jsgf_import_rule(jsgf_t *jsgf, char *name)
00611 {
00612 char *c, *path, *newpath;
00613 size_t namelen, packlen;
00614 void *val;
00615 jsgf_t *imp;
00616 int import_all;
00617
00618
00619 namelen = strlen(name);
00620 path = ckd_malloc(namelen - 2 + 6);
00621 strcpy(path, name + 1);
00622
00623 c = strrchr(path, '.');
00624 if (c == NULL) {
00625 E_ERROR("Imported rule is not qualified: %s\n", name);
00626 ckd_free(path);
00627 return NULL;
00628 }
00629 packlen = c - path;
00630 *c = '\0';
00631
00632
00633 import_all = (strlen(name) > 2 && 0 == strcmp(name + namelen - 3, ".*>"));
00634
00635
00636 for (c = path; *c; ++c)
00637 if (*c == '.') *c = '/';
00638 strcat(path, ".gram");
00639 newpath = path_list_search(jsgf->searchpath, path);
00640 ckd_free(path);
00641 if (newpath == NULL)
00642 return NULL;
00643
00644 path = newpath;
00645 E_INFO("Importing %s from %s to %s\n", name, path, jsgf->name);
00646
00647
00648
00649
00650 if (hash_table_lookup(jsgf->imports, path, &val) == 0) {
00651 E_INFO("Already imported %s\n", path);
00652 imp = val;
00653 ckd_free(path);
00654 }
00655 else {
00656
00657 imp = jsgf_parse_file(path, jsgf);
00658 val = hash_table_enter(jsgf->imports, path, imp);
00659 if (val != (void *)imp) {
00660 E_WARN("Multiply imported file: %s\n", path);
00661 }
00662 }
00663 if (imp != NULL) {
00664 hash_iter_t *itor;
00665
00666 for (itor = hash_table_iter(imp->rules); itor;
00667 itor = hash_table_iter_next(itor)) {
00668 hash_entry_t *he = itor->ent;
00669 jsgf_rule_t *rule = hash_entry_val(he);
00670 int rule_matches;
00671 char *rule_name = importname2rulename(name);
00672
00673 if (import_all) {
00674
00675 rule_matches = !strncmp(rule_name, rule->name, packlen + 1);
00676 }
00677 else {
00678
00679 rule_matches = !strcmp(rule_name, rule->name);
00680 }
00681 ckd_free(rule_name);
00682 if (rule->public && rule_matches) {
00683 void *val;
00684 char *newname;
00685
00686
00687 c = strrchr(rule->name, '.');
00688 assert(c != NULL);
00689 newname = jsgf_fullname(jsgf, c);
00690
00691 E_INFO("Imported %s\n", newname);
00692 val = hash_table_enter(jsgf->rules, newname,
00693 jsgf_rule_retain(rule));
00694 if (val != (void *)rule) {
00695 E_WARN("Multiply defined symbol: %s\n", newname);
00696 }
00697 if (!import_all) {
00698 hash_table_iter_free(itor);
00699 return rule;
00700 }
00701 }
00702 }
00703 }
00704
00705 return NULL;
00706 }
00707
00708 jsgf_t *
00709 jsgf_parse_file(const char *filename, jsgf_t *parent)
00710 {
00711 yyscan_t yyscanner;
00712 jsgf_t *jsgf;
00713 int yyrv;
00714 FILE *in = NULL;
00715
00716 yylex_init(&yyscanner);
00717 if (filename == NULL) {
00718 yyset_in(stdin, yyscanner);
00719 }
00720 else {
00721 in = fopen(filename, "r");
00722 if (in == NULL) {
00723 fprintf(stderr, "Failed to open %s for parsing: %s\n",
00724 filename, strerror(errno));
00725 return NULL;
00726 }
00727 yyset_in(in, yyscanner);
00728 }
00729
00730 jsgf = jsgf_grammar_new(parent);
00731 yyrv = yyparse(yyscanner, jsgf);
00732 if (yyrv != 0) {
00733 fprintf(stderr, "JSGF parse of %s failed\n",
00734 filename ? filename : "(stdin)");
00735 jsgf_grammar_free(jsgf);
00736 yylex_destroy(yyscanner);
00737 return NULL;
00738 }
00739 if (in)
00740 fclose(in);
00741 yylex_destroy(yyscanner);
00742
00743 return jsgf;
00744 }