SphinxBase
0.6
|
00001 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ 00002 /* ==================================================================== 00003 * Copyright (c) 1999-2004 Carnegie Mellon University. All rights 00004 * reserved. 00005 * 00006 * Redistribution and use in source and binary forms, with or without 00007 * modification, are permitted provided that the following conditions 00008 * are met: 00009 * 00010 * 1. Redistributions of source code must retain the above copyright 00011 * notice, this list of conditions and the following disclaimer. 00012 * 00013 * 2. Redistributions in binary form must reproduce the above copyright 00014 * notice, this list of conditions and the following disclaimer in 00015 * the documentation and/or other materials provided with the 00016 * distribution. 00017 * 00018 * This work was supported in part by funding from the Defense Advanced 00019 * Research Projects Agency and the National Science Foundation of the 00020 * United States of America, and the CMU Sphinx Speech Consortium. 00021 * 00022 * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 00023 * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 00024 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 00025 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY 00026 * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 00027 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 00028 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 00029 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 00030 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 00031 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 00032 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00033 * 00034 * ==================================================================== 00035 * 00036 */ 00037 /* 00038 * hash.c -- Hash table module. 00039 * 00040 * ********************************************** 00041 * CMU ARPA Speech Project 00042 * 00043 * Copyright (c) 1999 Carnegie Mellon University. 00044 * ALL RIGHTS RESERVED. 00045 * ********************************************** 00046 * 00047 * HISTORY 00048 * $Log: hash.c,v $ 00049 * Revision 1.5 2005/06/22 03:04:01 arthchan2003 00050 * 1, Implemented hash_delete and hash_display, 2, Fixed doxygen documentation, 3, Added keyword. 00051 * 00052 * Revision 1.9 2005/05/25 06:17:53 archan 00053 * Delete the test code in cmd_ln.c and fixed platform specific code of hash.c 00054 * 00055 * Revision 1.8 2005/05/24 01:10:54 archan 00056 * Fix a bug when the value only appear in the hash but there is no chain. Also make sure that prev was initialized to NULL. All success cases were tested, but not tested with the deletion is tested. 00057 * 00058 * Revision 1.6 2005/05/24 00:00:45 archan 00059 * Added basic functionalities to hash_t: 1, display and 2, delete a key from a hash. \n 00060 * 00061 * Revision 1.5 2005/05/11 07:01:38 archan 00062 * Added comments on the usage of the current implementation of hash tables. 00063 * 00064 * Revision 1.4 2005/05/03 04:09:11 archan 00065 * Implemented the heart of word copy search. For every ci-phone, every word end, a tree will be allocated to preserve its pathscore. This is different from 3.5 or below, only the best score for a particular ci-phone, regardless of the word-ends will be preserved at every frame. The graph propagation will not collect unused word tree at this point. srch_WST_propagate_wd_lv2 is also as the most stupid in the century. But well, after all, everything needs a start. I will then really get the results from the search and see how it looks. 00066 * 00067 * Revision 1.3 2005/03/30 01:22:48 archan 00068 * Fixed mistakes in last updates. Add 00069 * 00070 * 00071 * 05-May-1999 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon 00072 * Removed hash_key2hash(). Added hash_enter_bkey() and hash_lookup_bkey(), 00073 * and len attribute to hash_entry_t. 00074 * 00075 * 30-Apr-1999 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon 00076 * Added hash_key2hash(). 00077 * 00078 * 18-Jun-97 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon 00079 * Included case sensitive/insensitive option. Removed local, static 00080 * maintenance of all hash tables. 00081 * 00082 * 31-Jul-95 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon 00083 * Created. 00084 */ 00085 00086 00087 #include <stdio.h> 00088 #include <stdlib.h> 00089 #include <string.h> 00090 #include <assert.h> 00091 00092 #ifdef _MSC_VER 00093 #pragma warning (disable: 4018) 00094 #endif 00095 00096 #include "sphinxbase/hash_table.h" 00097 #include "sphinxbase/err.h" 00098 #include "sphinxbase/ckd_alloc.h" 00099 #include "sphinxbase/case.h" 00100 00101 00102 #if 0 00103 static void 00104 prime_sieve(int32 max) 00105 { 00106 char *notprime; 00107 int32 p, pp; 00108 00109 notprime = (char *) ckd_calloc(max + 1, 1); 00110 p = 2; 00111 for (;;) { 00112 printf("%d\n", p); 00113 for (pp = p + p; pp <= max; pp += p) 00114 notprime[pp] = 1; 00115 for (++p; (p <= max) && notprime[p]; p++); 00116 if (p > max) 00117 break; 00118 } 00119 } 00120 #endif 00121 00122 00123 /* 00124 * HACK!! Initial hash table size is restricted by this set of primes. (Of course, 00125 * collision resolution by chaining will accommodate more entries indefinitely, but 00126 * efficiency will drop.) 00127 */ 00128 const int32 prime[] = { 00129 101, 211, 307, 401, 503, 601, 701, 809, 907, 00130 1009, 1201, 1601, 2003, 2411, 3001, 4001, 5003, 6007, 7001, 8009, 00131 9001, 00132 10007, 12007, 16001, 20011, 24001, 30011, 40009, 50021, 60013, 00133 70001, 80021, 90001, 00134 100003, 120011, 160001, 200003, 240007, 300007, 400009, 500009, 00135 600011, 700001, 800011, 900001, 00136 -1 00137 }; 00138 00139 00143 static int32 00144 prime_size(int32 size) 00145 { 00146 int32 i; 00147 00148 for (i = 0; (prime[i] > 0) && (prime[i] < size); i++); 00149 if (prime[i] <= 0) { 00150 E_WARN("Very large hash table requested (%d entries)\n", size); 00151 --i; 00152 } 00153 return (prime[i]); 00154 } 00155 00156 00157 hash_table_t * 00158 hash_table_new(int32 size, int32 casearg) 00159 { 00160 hash_table_t *h; 00161 00162 h = (hash_table_t *) ckd_calloc(1, sizeof(hash_table_t)); 00163 h->size = prime_size(size + (size >> 1)); 00164 h->nocase = (casearg == HASH_CASE_NO); 00165 h->table = (hash_entry_t *) ckd_calloc(h->size, sizeof(hash_entry_t)); 00166 /* The above calloc clears h->table[*].key and .next to NULL, i.e. an empty table */ 00167 00168 return h; 00169 } 00170 00171 00172 /* 00173 * Compute hash value for given key string. 00174 * Somewhat tuned for English text word strings. 00175 */ 00176 static uint32 00177 key2hash(hash_table_t * h, const char *key) 00178 { 00179 00180 register const char *cp; 00181 00191 /*register char c; */ 00192 register unsigned char c; 00193 register int32 s; 00194 register uint32 hash; 00195 00196 hash = 0; 00197 s = 0; 00198 00199 if (h->nocase) { 00200 for (cp = key; *cp; cp++) { 00201 c = *cp; 00202 c = UPPER_CASE(c); 00203 hash += c << s; 00204 s += 5; 00205 if (s >= 25) 00206 s -= 24; 00207 } 00208 } 00209 else { 00210 for (cp = key; *cp; cp++) { 00211 hash += (*cp) << s; 00212 s += 5; 00213 if (s >= 25) 00214 s -= 24; 00215 } 00216 } 00217 00218 return (hash % h->size); 00219 } 00220 00221 00222 static char * 00223 makekey(uint8 * data, int32 len, char *key) 00224 { 00225 int32 i, j; 00226 00227 if (!key) 00228 key = (char *) ckd_calloc(len * 2 + 1, sizeof(char)); 00229 00230 for (i = 0, j = 0; i < len; i++, j += 2) { 00231 key[j] = 'A' + (data[i] & 0x000f); 00232 key[j + 1] = 'J' + ((data[i] >> 4) & 0x000f); 00233 } 00234 key[j] = '\0'; 00235 00236 return key; 00237 } 00238 00239 00240 static int32 00241 keycmp_nocase(hash_entry_t * entry, const char *key) 00242 { 00243 char c1, c2; 00244 int32 i; 00245 const char *str; 00246 00247 str = entry->key; 00248 for (i = 0; i < entry->len; i++) { 00249 c1 = *(str++); 00250 c1 = UPPER_CASE(c1); 00251 c2 = *(key++); 00252 c2 = UPPER_CASE(c2); 00253 if (c1 != c2) 00254 return (c1 - c2); 00255 } 00256 00257 return 0; 00258 } 00259 00260 00261 static int32 00262 keycmp_case(hash_entry_t * entry, const char *key) 00263 { 00264 char c1, c2; 00265 int32 i; 00266 const char *str; 00267 00268 str = entry->key; 00269 for (i = 0; i < entry->len; i++) { 00270 c1 = *(str++); 00271 c2 = *(key++); 00272 if (c1 != c2) 00273 return (c1 - c2); 00274 } 00275 00276 return 0; 00277 } 00278 00279 00280 /* 00281 * Lookup entry with hash-value hash in table h for given key 00282 * Return value: hash_entry_t for key 00283 */ 00284 static hash_entry_t * 00285 lookup(hash_table_t * h, uint32 hash, const char *key, size_t len) 00286 { 00287 hash_entry_t *entry; 00288 00289 entry = &(h->table[hash]); 00290 if (entry->key == NULL) 00291 return NULL; 00292 00293 if (h->nocase) { 00294 while (entry && ((entry->len != len) 00295 || (keycmp_nocase(entry, key) != 0))) 00296 entry = entry->next; 00297 } 00298 else { 00299 while (entry && ((entry->len != len) 00300 || (keycmp_case(entry, key) != 0))) 00301 entry = entry->next; 00302 } 00303 00304 return entry; 00305 } 00306 00307 00308 int32 00309 hash_table_lookup(hash_table_t * h, const char *key, void ** val) 00310 { 00311 hash_entry_t *entry; 00312 uint32 hash; 00313 int32 len; 00314 00315 hash = key2hash(h, key); 00316 len = strlen(key); 00317 00318 entry = lookup(h, hash, key, len); 00319 if (entry) { 00320 if (val) 00321 *val = entry->val; 00322 return 0; 00323 } 00324 else 00325 return -1; 00326 } 00327 00328 int32 00329 hash_table_lookup_int32(hash_table_t * h, const char *key, int32 *val) 00330 { 00331 void *vval; 00332 int32 rv; 00333 00334 rv = hash_table_lookup(h, key, &vval); 00335 if (rv != 0) 00336 return rv; 00337 if (val) 00338 *val = (int32)(long)vval; 00339 return 0; 00340 } 00341 00342 00343 int32 00344 hash_table_lookup_bkey(hash_table_t * h, const char *key, size_t len, void ** val) 00345 { 00346 hash_entry_t *entry; 00347 uint32 hash; 00348 char *str; 00349 00350 str = makekey((uint8 *) key, len, NULL); 00351 hash = key2hash(h, str); 00352 ckd_free(str); 00353 00354 entry = lookup(h, hash, key, len); 00355 if (entry) { 00356 if (val) 00357 *val = entry->val; 00358 return 0; 00359 } 00360 else 00361 return -1; 00362 } 00363 00364 int32 00365 hash_table_lookup_bkey_int32(hash_table_t * h, const char *key, size_t len, int32 *val) 00366 { 00367 void *vval; 00368 int32 rv; 00369 00370 rv = hash_table_lookup_bkey(h, key, len, &vval); 00371 if (rv != 0) 00372 return rv; 00373 if (val) 00374 *val = (int32)(long)vval; 00375 return 0; 00376 } 00377 00378 00379 static void * 00380 enter(hash_table_t * h, uint32 hash, const char *key, size_t len, void *val, int32 replace) 00381 { 00382 hash_entry_t *cur, *new; 00383 00384 if ((cur = lookup(h, hash, key, len)) != NULL) { 00385 void *oldval; 00386 /* Key already exists. */ 00387 oldval = cur->val; 00388 if (replace) { 00389 /* Replace the pointer if replacement is requested, 00390 * because this might be a different instance of the same 00391 * string (this verges on magic, sorry) */ 00392 cur->key = key; 00393 cur->val = val; 00394 } 00395 return oldval; 00396 } 00397 00398 cur = &(h->table[hash]); 00399 if (cur->key == NULL) { 00400 /* Empty slot at hashed location; add this entry */ 00401 cur->key = key; 00402 cur->len = len; 00403 cur->val = val; 00404 00405 /* Added by ARCHAN at 20050515. This allows deletion could work. */ 00406 cur->next = NULL; 00407 00408 } 00409 else { 00410 /* Key collision; create new entry and link to hashed location */ 00411 new = (hash_entry_t *) ckd_calloc(1, sizeof(hash_entry_t)); 00412 new->key = key; 00413 new->len = len; 00414 new->val = val; 00415 new->next = cur->next; 00416 cur->next = new; 00417 } 00418 ++h->inuse; 00419 00420 return val; 00421 } 00422 00423 /* 20050523 Added by ARCHAN to delete a key from a hash table */ 00424 static void * 00425 delete(hash_table_t * h, uint32 hash, const char *key, size_t len) 00426 { 00427 hash_entry_t *entry, *prev; 00428 void *val; 00429 00430 prev = NULL; 00431 entry = &(h->table[hash]); 00432 if (entry->key == NULL) 00433 return NULL; 00434 00435 if (h->nocase) { 00436 while (entry && ((entry->len != len) 00437 || (keycmp_nocase(entry, key) != 0))) { 00438 prev = entry; 00439 entry = entry->next; 00440 } 00441 } 00442 else { 00443 while (entry && ((entry->len != len) 00444 || (keycmp_case(entry, key) != 0))) { 00445 prev = entry; 00446 entry = entry->next; 00447 } 00448 } 00449 00450 if (entry == NULL) 00451 return NULL; 00452 00453 /* At this point, entry will be the one required to be deleted, prev 00454 will contain the previous entry 00455 */ 00456 val = entry->val; 00457 00458 if (prev == NULL) { 00459 /* That is to say the entry in the hash table (not the chain) matched the key. */ 00460 /* We will then copy the things from the next entry to the hash table */ 00461 prev = entry; 00462 if (entry->next) { /* There is a next entry, great, copy it. */ 00463 entry = entry->next; 00464 prev->key = entry->key; 00465 prev->len = entry->len; 00466 prev->val = entry->val; 00467 prev->next = entry->next; 00468 ckd_free(entry); 00469 } 00470 else { /* There is not a next entry, just set the key to null */ 00471 prev->key = NULL; 00472 prev->len = 0; 00473 prev->next = NULL; 00474 } 00475 00476 } 00477 else { /* This case is simple */ 00478 prev->next = entry->next; 00479 ckd_free(entry); 00480 } 00481 00482 /* Do wiring and free the entry */ 00483 00484 --h->inuse; 00485 00486 return val; 00487 } 00488 00489 void 00490 hash_table_empty(hash_table_t *h) 00491 { 00492 hash_entry_t *e, *e2; 00493 int32 i; 00494 00495 for (i = 0; i < h->size; i++) { 00496 /* Free collision lists. */ 00497 for (e = h->table[i].next; e; e = e2) { 00498 e2 = e->next; 00499 ckd_free((void *) e); 00500 } 00501 memset(&h->table[i], 0, sizeof(h->table[i])); 00502 } 00503 h->inuse = 0; 00504 } 00505 00506 00507 void * 00508 hash_table_enter(hash_table_t * h, const char *key, void *val) 00509 { 00510 uint32 hash; 00511 size_t len; 00512 00513 hash = key2hash(h, key); 00514 len = strlen(key); 00515 return (enter(h, hash, key, len, val, 0)); 00516 } 00517 00518 void * 00519 hash_table_replace(hash_table_t * h, const char *key, void *val) 00520 { 00521 uint32 hash; 00522 size_t len; 00523 00524 hash = key2hash(h, key); 00525 len = strlen(key); 00526 return (enter(h, hash, key, len, val, 1)); 00527 } 00528 00529 void * 00530 hash_table_delete(hash_table_t * h, const char *key) 00531 { 00532 uint32 hash; 00533 size_t len; 00534 00535 hash = key2hash(h, key); 00536 len = strlen(key); 00537 00538 return (delete(h, hash, key, len)); 00539 } 00540 00541 void * 00542 hash_table_enter_bkey(hash_table_t * h, const char *key, size_t len, void *val) 00543 { 00544 uint32 hash; 00545 char *str; 00546 00547 str = makekey((uint8 *) key, len, NULL); 00548 hash = key2hash(h, str); 00549 ckd_free(str); 00550 00551 return (enter(h, hash, key, len, val, 0)); 00552 } 00553 00554 void * 00555 hash_table_replace_bkey(hash_table_t * h, const char *key, size_t len, void *val) 00556 { 00557 uint32 hash; 00558 char *str; 00559 00560 str = makekey((uint8 *) key, len, NULL); 00561 hash = key2hash(h, str); 00562 ckd_free(str); 00563 00564 return (enter(h, hash, key, len, val, 1)); 00565 } 00566 00567 void * 00568 hash_table_delete_bkey(hash_table_t * h, const char *key, size_t len) 00569 { 00570 uint32 hash; 00571 char *str; 00572 00573 str = makekey((uint8 *) key, len, NULL); 00574 hash = key2hash(h, str); 00575 00576 return (delete(h, hash, key, len)); 00577 } 00578 00579 void 00580 hash_table_display(hash_table_t * h, int32 showdisplay) 00581 { 00582 hash_entry_t *e; 00583 int i, j; 00584 j = 0; 00585 00586 E_INFOCONT("Hash with chaining representation of the hash table\n"); 00587 00588 for (i = 0; i < h->size; i++) { 00589 e = &(h->table[i]); 00590 if (e->key != NULL) { 00591 E_INFOCONT("|key:"); 00592 if (showdisplay) 00593 E_INFOCONT("%s", e->key); 00594 else 00595 E_INFOCONT("%p", e->key); 00596 00597 E_INFOCONT("|len:%d|val=%ld|->", e->len, (long)e->val); 00598 if (e->next == NULL) { 00599 E_INFOCONT("NULL\n"); 00600 } 00601 j++; 00602 00603 for (e = e->next; e; e = e->next) { 00604 E_INFOCONT("|key:"); 00605 if (showdisplay) 00606 E_INFOCONT("%s", e->key); 00607 00608 E_INFOCONT("|len:%d|val=%ld|->", e->len, (long)e->val); 00609 if (e->next == NULL) { 00610 E_INFOCONT("NULL\n"); 00611 } 00612 j++; 00613 } 00614 } 00615 } 00616 00617 E_INFOCONT("The total number of keys =%d\n", j); 00618 } 00619 00620 00621 glist_t 00622 hash_table_tolist(hash_table_t * h, int32 * count) 00623 { 00624 glist_t g; 00625 hash_entry_t *e; 00626 int32 i, j; 00627 00628 g = NULL; 00629 00630 j = 0; 00631 for (i = 0; i < h->size; i++) { 00632 e = &(h->table[i]); 00633 00634 if (e->key != NULL) { 00635 g = glist_add_ptr(g, (void *) e); 00636 j++; 00637 00638 for (e = e->next; e; e = e->next) { 00639 g = glist_add_ptr(g, (void *) e); 00640 j++; 00641 } 00642 } 00643 } 00644 00645 if (count) 00646 *count = j; 00647 00648 return g; 00649 } 00650 00651 hash_iter_t * 00652 hash_table_iter(hash_table_t *h) 00653 { 00654 hash_iter_t *itor; 00655 00656 itor = ckd_calloc(1, sizeof(*itor)); 00657 itor->ht = h; 00658 return hash_table_iter_next(itor); 00659 } 00660 00661 hash_iter_t * 00662 hash_table_iter_next(hash_iter_t *itor) 00663 { 00664 /* If there is an entry, walk down its list. */ 00665 if (itor->ent) 00666 itor->ent = itor->ent->next; 00667 /* If we got to the end of the chain, or we had no entry, scan 00668 * forward in the table to find the next non-empty bucket. */ 00669 if (itor->ent == NULL) { 00670 while (itor->idx < itor->ht->size 00671 && itor->ht->table[itor->idx].key == NULL) 00672 ++itor->idx; 00673 /* If we did not find one then delete the iterator and 00674 * return NULL. */ 00675 if (itor->idx == itor->ht->size) { 00676 hash_table_iter_free(itor); 00677 return NULL; 00678 } 00679 /* Otherwise use this next entry. */ 00680 itor->ent = itor->ht->table + itor->idx; 00681 /* Increase idx for the next time around. */ 00682 ++itor->idx; 00683 } 00684 return itor; 00685 } 00686 00687 void 00688 hash_table_iter_free(hash_iter_t *itor) 00689 { 00690 ckd_free(itor); 00691 } 00692 00693 void 00694 hash_table_free(hash_table_t * h) 00695 { 00696 hash_entry_t *e, *e2; 00697 int32 i; 00698 00699 if (h == NULL) 00700 return; 00701 00702 /* Free additional entries created for key collision cases */ 00703 for (i = 0; i < h->size; i++) { 00704 for (e = h->table[i].next; e; e = e2) { 00705 e2 = e->next; 00706 ckd_free((void *) e); 00707 } 00708 } 00709 00710 ckd_free((void *) h->table); 00711 ckd_free((void *) h); 00712 }