00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043 #ifndef __NGRAM_MODEL_INTERNAL_H__
00044 #define __NGRAM_MODEL_INTERNAL_H__
00045
00046 #include "ngram_model.h"
00047 #include "hash_table.h"
00048
00055 struct ngram_model_s {
00056 int refcount;
00057 int32 *n_counts;
00058 int32 n_1g_alloc;
00059 int32 n_words;
00061 uint8 n;
00062 uint8 n_classes;
00063 uint8 writable;
00064 uint8 flags;
00066 logmath_t *lmath;
00067 float32 lw;
00068 int32 log_wip;
00069 int32 log_uw;
00070 int32 log_uniform;
00071 int32 log_uniform_weight;
00072 int32 log_zero;
00073 char **word_str;
00074 hash_table_t *wid;
00075 struct ngram_class_s **classes;
00076 struct ngram_funcs_s *funcs;
00077 };
00078
00082 struct ngram_class_s {
00083 int32 tag_wid;
00084 int32 start_wid;
00085 int32 n_words;
00086 int32 *prob1;
00090 struct ngram_hash_s {
00091 int32 wid;
00092 int32 prob1;
00093 int32 next;
00094 } *nword_hash;
00095 int32 n_hash;
00096 int32 n_hash_inuse;
00097 };
00098
00099 #define NGRAM_HASH_SIZE 128
00100
00101 #define NGRAM_BASEWID(wid) ((wid)&0xffffff)
00102 #define NGRAM_CLASSID(wid) (((wid)>>24) & 0x7f)
00103 #define NGRAM_CLASSWID(wid,classid) (((classid)<<24) | 0x80000000 | (wid))
00104 #define NGRAM_IS_CLASSWID(wid) ((wid)&0x80000000)
00105
00106 #define UG_ALLOC_STEP 10
00107
00109 typedef struct ngram_funcs_s {
00113 void (*free)(ngram_model_t *model);
00117 int (*apply_weights)(ngram_model_t *model,
00118 float32 lw,
00119 float32 wip,
00120 float32 uw);
00124 int32 (*score)(ngram_model_t *model,
00125 int32 wid,
00126 int32 *history,
00127 int32 n_hist,
00128 int32 *n_used);
00133 int32 (*raw_score)(ngram_model_t *model,
00134 int32 wid,
00135 int32 *history,
00136 int32 n_hist,
00137 int32 *n_used);
00149 int32 (*add_ug)(ngram_model_t *model,
00150 int32 wid, int32 lweight);
00154 void (*flush)(ngram_model_t *model);
00155 } ngram_funcs_t;
00156
00160 typedef struct classdef_s {
00161 char **words;
00162 float32 *weights;
00163 int32 n_words;
00164 } classdef_t;
00165
00169 int32
00170 ngram_model_init(ngram_model_t *model,
00171 ngram_funcs_t *funcs,
00172 logmath_t *lmath,
00173 int32 n, int32 n_unigram);
00174
00178 ngram_model_t *ngram_model_arpa_read(cmd_ln_t *config,
00179 const char *file_name,
00180 logmath_t *lmath);
00184 ngram_model_t *ngram_model_dmp_read(cmd_ln_t *config,
00185 const char *file_name,
00186 logmath_t *lmath);
00190 ngram_model_t *ngram_model_dmp32_read(cmd_ln_t *config,
00191 const char *file_name,
00192 logmath_t *lmath);
00193
00197 int ngram_model_arpa_write(ngram_model_t *model,
00198 const char *file_name);
00202 int ngram_model_dmp_write(ngram_model_t *model,
00203 const char *file_name);
00207 int ngram_model_dmp32_write(ngram_model_t *model,
00208 const char *file_name);
00209
00213 int32 read_classdef_file(hash_table_t *classes, const char *classdef_file);
00214
00218 void classdef_free(classdef_t *classdef);
00219
00223 ngram_class_t *ngram_class_new(ngram_model_t *model, int32 tag_wid,
00224 int32 start_wid, glist_t classwords);
00225
00229 void ngram_class_free(ngram_class_t *lmclass);
00230
00236 int32 ngram_class_prob(ngram_class_t *lmclass, int32 wid);
00237
00238 #endif