00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043 #include "lm3g_model.h"
00044 #include "listelem_alloc.h"
00045 #include "ckd_alloc.h"
00046
00047 #include <string.h>
00048 #include <assert.h>
00049
00050 void
00051 lm3g_tginfo_free(ngram_model_t *base, lm3g_model_t *lm3g)
00052 {
00053 if (lm3g->tginfo == NULL)
00054 return;
00055 listelem_alloc_free(lm3g->le);
00056 ckd_free(lm3g->tginfo);
00057 }
00058
00059 void
00060 lm3g_tginfo_reset(ngram_model_t *base, lm3g_model_t *lm3g)
00061 {
00062 if (lm3g->tginfo == NULL)
00063 return;
00064 listelem_alloc_free(lm3g->le);
00065 memset(lm3g->tginfo, 0, base->n_counts[0] * sizeof(tginfo_t *));
00066 lm3g->le = listelem_alloc_init(sizeof(tginfo_t));
00067 }
00068
00069 void
00070 lm3g_apply_weights(ngram_model_t *base,
00071 lm3g_model_t *lm3g,
00072 float32 lw, float32 wip, float32 uw)
00073 {
00074 int32 log_wip, log_uw, log_uniform_weight;
00075 int i;
00076
00077
00078 log_wip = logmath_log(base->lmath, wip);
00079 log_uw = logmath_log(base->lmath, uw);
00080 log_uniform_weight = logmath_log(base->lmath, 1.0 - uw);
00081
00082 for (i = 0; i < base->n_counts[0]; ++i) {
00083 int32 prob1, bo_wt, n_used;
00084
00085
00086 bo_wt = (int32)(lm3g->unigrams[i].bo_wt1.l / base->lw);
00087
00088
00089 prob1 = ngram_ng_prob(base, i, NULL, 0, &n_used);
00090
00091 lm3g->unigrams[i].bo_wt1.l = (int32)(bo_wt * lw);
00092 if (strcmp(base->word_str[i], "<s>") == 0) {
00093
00094 lm3g->unigrams[i].prob1.l = (int32)(prob1 * lw) + log_wip;
00095 }
00096 else {
00097
00098 prob1 += log_uw;
00099 prob1 = logmath_add(base->lmath, prob1, base->log_uniform + log_uniform_weight);
00100
00101 lm3g->unigrams[i].prob1.l = (int32)(prob1 * lw) + log_wip;
00102 }
00103 }
00104
00105 for (i = 0; i < lm3g->n_prob2; ++i) {
00106 int32 prob2;
00107
00108 prob2 = (int32)((lm3g->prob2[i].l - base->log_wip) / base->lw);
00109 lm3g->prob2[i].l = (int32)(prob2 * lw) + log_wip;
00110 }
00111
00112 if (base->n > 2) {
00113 for (i = 0; i < lm3g->n_bo_wt2; ++i) {
00114 lm3g->bo_wt2[i].l = (int32)(lm3g->bo_wt2[i].l / base->lw * lw);
00115 }
00116 for (i = 0; i < lm3g->n_prob3; i++) {
00117 int32 prob3;
00118
00119 prob3 = (int32)((lm3g->prob3[i].l - base->log_wip) / base->lw);
00120 lm3g->prob3[i].l = (int32)(prob3 * lw) + log_wip;
00121 }
00122 }
00123
00124
00125 base->log_wip = log_wip;
00126 base->log_uw = log_uw;
00127 base->log_uniform_weight = log_uniform_weight;
00128 base->lw = lw;
00129 }
00130
00131 int32
00132 lm3g_add_ug(ngram_model_t *base,
00133 lm3g_model_t *lm3g, int32 wid, int32 lweight)
00134 {
00135 int32 score;
00136
00137
00138 assert(!NGRAM_IS_CLASSWID(wid));
00139
00140
00141 lm3g->unigrams = ckd_realloc(lm3g->unigrams,
00142 sizeof(*lm3g->unigrams) * base->n_1g_alloc);
00143 memset(lm3g->unigrams + base->n_counts[0], 0,
00144 (base->n_1g_alloc - base->n_counts[0]) * sizeof(*lm3g->unigrams));
00145
00146 lm3g->tginfo = ckd_realloc(lm3g->tginfo,
00147 sizeof(*lm3g->tginfo) * base->n_1g_alloc);
00148 memset(lm3g->tginfo + base->n_counts[0], 0,
00149 (base->n_1g_alloc - base->n_counts[0]) * sizeof(*lm3g->tginfo));
00150
00151
00152
00153
00154
00155 score = lweight + base->log_uniform + base->log_uw;
00156 score = logmath_add(base->lmath, score,
00157 base->log_uniform + base->log_uniform_weight);
00158 lm3g->unigrams[wid].prob1.l = score;
00159
00160
00161 lm3g->unigrams[wid].bo_wt1.l = logmath_get_zero(base->lmath);
00162 lm3g->unigrams[wid].bigrams = 0;
00163
00164 ++base->n_counts[0];
00165
00166
00167
00168
00169 if (wid >= base->n_counts[0])
00170 base->n_counts[0] = wid + 1;
00171
00172 return score;
00173 }