00001
00013 #include <string.h>
00014 #include "error.h"
00015 #include "SFMT.h"
00016 #include "SFMT-params.h"
00017
00018 #if defined(__BIG_ENDIAN__) && !defined(__amd64) && !defined(BIG_ENDIAN64)
00019 #define BIG_ENDIAN64 1
00020 #endif
00021 #if defined(HAVE_ALTIVEC) && !defined(BIG_ENDIAN64)
00022 #define BIG_ENDIAN64 1
00023 #endif
00024 #if defined(ONLY64) && !defined(BIG_ENDIAN64)
00025 #if defined(__GNUC__)
00026 #error "-DONLY64 must be specified with -DBIG_ENDIAN64"
00027 #endif
00028 #undef ONLY64
00029 #endif
00030
00031
00032
00033 #if defined(HAVE_ALTIVEC)
00034 #if !defined(__APPLE__)
00035 #include <altivec.h>
00036 #endif
00037
00038 union W128_T {
00039 vector unsigned int s;
00040 uint32_t u[4];
00041 };
00043 typedef union W128_T w128_t;
00044
00045 #elif defined(LUX_USE_SSE)
00046 #include <emmintrin.h>
00047
00049 union W128_T {
00050 __m128i si;
00051 uint32_t u[4];
00052 };
00054 typedef union W128_T w128_t;
00055
00056 #else
00057
00059 struct W128_T {
00060 uint32_t u[4];
00061 };
00063 typedef struct W128_T w128_t;
00064
00065 #endif
00066
00067
00068
00069
00070
00072 static w128_t sfmt[N];
00074 static uint32_t *psfmt32 = &sfmt[0].u[0];
00075 #if !defined(BIG_ENDIAN64) || defined(ONLY64)
00076
00077 static uint64_t *psfmt64 = (uint64_t *)&sfmt[0].u[0];
00078 #endif
00079
00080 static int idx;
00083 static int initialized = 0;
00085 static uint32_t parity[4] = {PARITY1, PARITY2, PARITY3, PARITY4};
00086
00087
00088
00089
00090 inline static int idxof(int i);
00091 inline static void rshift128(w128_t *out, w128_t const *in, int shift);
00092 inline static void lshift128(w128_t *out, w128_t const *in, int shift);
00093 inline static void gen_rand_all(void);
00094 inline static void gen_rand_array(w128_t *array, int size);
00095 inline static uint32_t func1(uint32_t x);
00096 inline static uint32_t func2(uint32_t x);
00097 static void period_certification(void);
00098 #if defined(BIG_ENDIAN64) && !defined(ONLY64)
00099 inline static void swap(w128_t *array, int size);
00100 #endif
00101
00102 #if defined(HAVE_ALTIVEC)
00103 #include "SFMT-alti.h"
00104 #elif defined(LUX_USE_SSE)
00105 #include "SFMT-sse2.h"
00106 #endif
00107
00112 #ifdef ONLY64
00113 inline static int idxof(int i) {
00114 return i ^ 1;
00115 }
00116 #else
00117 inline static int idxof(int i) {
00118 return i;
00119 }
00120 #endif
00121
00129 #ifdef ONLY64
00130 inline static void rshift128(w128_t *out, w128_t const *in, int shift) {
00131 uint64_t th, tl, oh, ol;
00132
00133 th = ((uint64_t)in->u[2] << 32) | ((uint64_t)in->u[3]);
00134 tl = ((uint64_t)in->u[0] << 32) | ((uint64_t)in->u[1]);
00135
00136 oh = th >> (shift * 8);
00137 ol = tl >> (shift * 8);
00138 ol |= th << (64 - shift * 8);
00139 out->u[0] = (uint32_t)(ol >> 32);
00140 out->u[1] = (uint32_t)ol;
00141 out->u[2] = (uint32_t)(oh >> 32);
00142 out->u[3] = (uint32_t)oh;
00143 }
00144 #else
00145 inline static void rshift128(w128_t *out, w128_t const *in, int shift) {
00146 uint64_t th, tl, oh, ol;
00147
00148 th = ((uint64_t)in->u[3] << 32) | ((uint64_t)in->u[2]);
00149 tl = ((uint64_t)in->u[1] << 32) | ((uint64_t)in->u[0]);
00150
00151 oh = th >> (shift * 8);
00152 ol = tl >> (shift * 8);
00153 ol |= th << (64 - shift * 8);
00154 out->u[1] = (uint32_t)(ol >> 32);
00155 out->u[0] = (uint32_t)ol;
00156 out->u[3] = (uint32_t)(oh >> 32);
00157 out->u[2] = (uint32_t)oh;
00158 }
00159 #endif
00160
00168 #ifdef ONLY64
00169 inline static void lshift128(w128_t *out, w128_t const *in, int shift) {
00170 uint64_t th, tl, oh, ol;
00171
00172 th = ((uint64_t)in->u[2] << 32) | ((uint64_t)in->u[3]);
00173 tl = ((uint64_t)in->u[0] << 32) | ((uint64_t)in->u[1]);
00174
00175 oh = th << (shift * 8);
00176 ol = tl << (shift * 8);
00177 oh |= tl >> (64 - shift * 8);
00178 out->u[0] = (uint32_t)(ol >> 32);
00179 out->u[1] = (uint32_t)ol;
00180 out->u[2] = (uint32_t)(oh >> 32);
00181 out->u[3] = (uint32_t)oh;
00182 }
00183 #else
00184 inline static void lshift128(w128_t *out, w128_t const *in, int shift) {
00185 uint64_t th, tl, oh, ol;
00186
00187 th = ((uint64_t)in->u[3] << 32) | ((uint64_t)in->u[2]);
00188 tl = ((uint64_t)in->u[1] << 32) | ((uint64_t)in->u[0]);
00189
00190 oh = th << (shift * 8);
00191 ol = tl << (shift * 8);
00192 oh |= tl >> (64 - shift * 8);
00193 out->u[1] = (uint32_t)(ol >> 32);
00194 out->u[0] = (uint32_t)ol;
00195 out->u[3] = (uint32_t)(oh >> 32);
00196 out->u[2] = (uint32_t)oh;
00197 }
00198 #endif
00199
00208 #if (!defined(HAVE_ALTIVEC)) && (!defined(LUX_USE_SSE))
00209 #ifdef ONLY64
00210 inline static void do_recursion(w128_t *r, w128_t *a, w128_t *b, w128_t *c,
00211 w128_t *d) {
00212 w128_t x;
00213 w128_t y;
00214
00215 lshift128(&x, a, SL2);
00216 rshift128(&y, c, SR2);
00217 r->u[0] = a->u[0] ^ x.u[0] ^ ((b->u[0] >> SR1) & MSK2) ^ y.u[0]
00218 ^ (d->u[0] << SL1);
00219 r->u[1] = a->u[1] ^ x.u[1] ^ ((b->u[1] >> SR1) & MSK1) ^ y.u[1]
00220 ^ (d->u[1] << SL1);
00221 r->u[2] = a->u[2] ^ x.u[2] ^ ((b->u[2] >> SR1) & MSK4) ^ y.u[2]
00222 ^ (d->u[2] << SL1);
00223 r->u[3] = a->u[3] ^ x.u[3] ^ ((b->u[3] >> SR1) & MSK3) ^ y.u[3]
00224 ^ (d->u[3] << SL1);
00225 }
00226 #else
00227 inline static void do_recursion(w128_t *r, w128_t *a, w128_t *b, w128_t *c,
00228 w128_t *d) {
00229 w128_t x;
00230 w128_t y;
00231
00232 lshift128(&x, a, SL2);
00233 rshift128(&y, c, SR2);
00234 r->u[0] = a->u[0] ^ x.u[0] ^ ((b->u[0] >> SR1) & MSK1) ^ y.u[0]
00235 ^ (d->u[0] << SL1);
00236 r->u[1] = a->u[1] ^ x.u[1] ^ ((b->u[1] >> SR1) & MSK2) ^ y.u[1]
00237 ^ (d->u[1] << SL1);
00238 r->u[2] = a->u[2] ^ x.u[2] ^ ((b->u[2] >> SR1) & MSK3) ^ y.u[2]
00239 ^ (d->u[2] << SL1);
00240 r->u[3] = a->u[3] ^ x.u[3] ^ ((b->u[3] >> SR1) & MSK4) ^ y.u[3]
00241 ^ (d->u[3] << SL1);
00242 }
00243 #endif
00244 #endif
00245
00246 #if (!defined(HAVE_ALTIVEC)) && (!defined(LUX_USE_SSE))
00247
00251 inline static void gen_rand_all(void) {
00252 int i;
00253 w128_t *r1, *r2;
00254
00255 r1 = &sfmt[N - 2];
00256 r2 = &sfmt[N - 1];
00257 for (i = 0; i < N - POS1; i++) {
00258 do_recursion(&sfmt[i], &sfmt[i], &sfmt[i + POS1], r1, r2);
00259 r1 = r2;
00260 r2 = &sfmt[i];
00261 }
00262 for (; i < N; i++) {
00263 do_recursion(&sfmt[i], &sfmt[i], &sfmt[i + POS1 - N], r1, r2);
00264 r1 = r2;
00265 r2 = &sfmt[i];
00266 }
00267 }
00268
00276 inline static void gen_rand_array(w128_t *array, int size) {
00277 int i, j;
00278 w128_t *r1, *r2;
00279
00280 r1 = &sfmt[N - 2];
00281 r2 = &sfmt[N - 1];
00282 for (i = 0; i < N - POS1; i++) {
00283 do_recursion(&array[i], &sfmt[i], &sfmt[i + POS1], r1, r2);
00284 r1 = r2;
00285 r2 = &array[i];
00286 }
00287 for (; i < N; i++) {
00288 do_recursion(&array[i], &sfmt[i], &array[i + POS1 - N], r1, r2);
00289 r1 = r2;
00290 r2 = &array[i];
00291 }
00292 for (; i < size - N; i++) {
00293 do_recursion(&array[i], &array[i - N], &array[i + POS1 - N], r1, r2);
00294 r1 = r2;
00295 r2 = &array[i];
00296 }
00297 for (j = 0; j < 2 * N - size; j++) {
00298 sfmt[j] = array[j + size - N];
00299 }
00300 for (; i < size; i++, j++) {
00301 do_recursion(&array[i], &array[i - N], &array[i + POS1 - N], r1, r2);
00302 r1 = r2;
00303 r2 = &array[i];
00304 sfmt[j] = array[i];
00305 }
00306 }
00307 #endif
00308
00309 #if defined(BIG_ENDIAN64) && !defined(ONLY64) && !defined(HAVE_ALTIVEC)
00310 inline static void swap(w128_t *array, int size) {
00311 int i;
00312 uint32_t x, y;
00313
00314 for (i = 0; i < size; i++) {
00315 x = array[i].u[0];
00316 y = array[i].u[2];
00317 array[i].u[0] = array[i].u[1];
00318 array[i].u[2] = array[i].u[3];
00319 array[i].u[1] = x;
00320 array[i].u[3] = y;
00321 }
00322 }
00323 #endif
00324
00330 static uint32_t func1(uint32_t x) {
00331 return (x ^ (x >> 27)) * (uint32_t)1664525UL;
00332 }
00333
00340 static uint32_t func2(uint32_t x) {
00341 return (x ^ (x >> 27)) * (uint32_t)1566083941UL;
00342 }
00343
00347 static void period_certification(void) {
00348 int inner = 0;
00349 int i, j;
00350 uint32_t work;
00351
00352 for (i = 0; i < 4; i++)
00353 inner ^= psfmt32[idxof(i)] & parity[i];
00354 for (i = 16; i > 0; i >>= 1)
00355 inner ^= inner >> i;
00356 inner &= 1;
00357
00358 if (inner == 1) {
00359 return;
00360 }
00361
00362 for (i = 0; i < 4; i++) {
00363 work = 1;
00364 for (j = 0; j < 32; j++) {
00365 if ((work & parity[i]) != 0) {
00366 psfmt32[idxof(i)] ^= work;
00367 return;
00368 }
00369 work = work << 1;
00370 }
00371 }
00372 }
00373
00374
00375
00376
00382 const char *get_idstring(void) {
00383 return IDSTR;
00384 }
00385
00391 int get_min_array_size32(void) {
00392 return N32;
00393 }
00394
00400 int get_min_array_size64(void) {
00401 return N64;
00402 }
00403
00404 #ifndef ONLY64
00405
00410 uint32_t gen_rand32(void) {
00411 uint32_t r;
00412
00413 BOOST_ASSERT(initialized);
00414 if (idx >= N32) {
00415 gen_rand_all();
00416 idx = 0;
00417 }
00418 r = psfmt32[idx++];
00419 return r;
00420 }
00421 #endif
00422
00429 uint64_t gen_rand64(void) {
00430 #if defined(BIG_ENDIAN64) && !defined(ONLY64)
00431 uint32_t r1, r2;
00432 #else
00433 uint64_t r;
00434 #endif
00435
00436 BOOST_ASSERT(initialized);
00437 BOOST_ASSERT(idx % 2 == 0);
00438
00439 if (idx >= N32) {
00440 gen_rand_all();
00441 idx = 0;
00442 }
00443 #if defined(BIG_ENDIAN64) && !defined(ONLY64)
00444 r1 = psfmt32[idx];
00445 r2 = psfmt32[idx + 1];
00446 idx += 2;
00447 return ((uint64_t)r2 << 32) | r1;
00448 #else
00449 r = psfmt64[idx / 2];
00450 idx += 2;
00451 return r;
00452 #endif
00453 }
00454
00455 #ifndef ONLY64
00456
00481 void fill_array32(uint32_t *array, int size) {
00482 BOOST_ASSERT(initialized);
00483 BOOST_ASSERT(idx == N32);
00484 BOOST_ASSERT(size % 4 == 0);
00485 BOOST_ASSERT(size >= N32);
00486
00487 gen_rand_array((w128_t *)array, size / 4);
00488 idx = N32;
00489 }
00490 #endif
00491
00517 void fill_array64(uint64_t *array, int size) {
00518 BOOST_ASSERT(initialized);
00519 BOOST_ASSERT(idx == N32);
00520 BOOST_ASSERT(size % 2 == 0);
00521 BOOST_ASSERT(size >= N64);
00522
00523 gen_rand_array((w128_t *)array, size / 2);
00524 idx = N32;
00525
00526 #if defined(BIG_ENDIAN64) && !defined(ONLY64)
00527 swap((w128_t *)array, size /2);
00528 #endif
00529 }
00530
00537 void init_gen_rand(uint32_t seed) {
00538 int i;
00539
00540 psfmt32[idxof(0)] = seed;
00541 for (i = 1; i < N32; i++) {
00542 psfmt32[idxof(i)] = 1812433253UL * (psfmt32[idxof(i - 1)]
00543 ^ (psfmt32[idxof(i - 1)] >> 30))
00544 + i;
00545 }
00546 idx = N32;
00547 period_certification();
00548 initialized = 1;
00549 }
00550
00557 void init_by_array(uint32_t *init_key, int key_length) {
00558 int i, j, count;
00559 uint32_t r;
00560 int lag;
00561 int mid;
00562 int size = N * 4;
00563
00564 if (size >= 623) {
00565 lag = 11;
00566 } else if (size >= 68) {
00567 lag = 7;
00568 } else if (size >= 39) {
00569 lag = 5;
00570 } else {
00571 lag = 3;
00572 }
00573 mid = (size - lag) / 2;
00574
00575 memset(sfmt, 0x8b, sizeof(sfmt));
00576 if (key_length + 1 > N32) {
00577 count = key_length + 1;
00578 } else {
00579 count = N32;
00580 }
00581 r = func1(psfmt32[idxof(0)] ^ psfmt32[idxof(mid)]
00582 ^ psfmt32[idxof(N32 - 1)]);
00583 psfmt32[idxof(mid)] += r;
00584 r += key_length;
00585 psfmt32[idxof(mid + lag)] += r;
00586 psfmt32[idxof(0)] = r;
00587
00588 count--;
00589 for (i = 1, j = 0; (j < count) && (j < key_length); j++) {
00590 r = func1(psfmt32[idxof(i)] ^ psfmt32[idxof((i + mid) % N32)]
00591 ^ psfmt32[idxof((i + N32 - 1) % N32)]);
00592 psfmt32[idxof((i + mid) % N32)] += r;
00593 r += init_key[j] + i;
00594 psfmt32[idxof((i + mid + lag) % N32)] += r;
00595 psfmt32[idxof(i)] = r;
00596 i = (i + 1) % N32;
00597 }
00598 for (; j < count; j++) {
00599 r = func1(psfmt32[idxof(i)] ^ psfmt32[idxof((i + mid) % N32)]
00600 ^ psfmt32[idxof((i + N32 - 1) % N32)]);
00601 psfmt32[idxof((i + mid) % N32)] += r;
00602 r += i;
00603 psfmt32[idxof((i + mid + lag) % N32)] += r;
00604 psfmt32[idxof(i)] = r;
00605 i = (i + 1) % N32;
00606 }
00607 for (j = 0; j < N32; j++) {
00608 r = func2(psfmt32[idxof(i)] + psfmt32[idxof((i + mid) % N32)]
00609 + psfmt32[idxof((i + N32 - 1) % N32)]);
00610 psfmt32[idxof((i + mid) % N32)] ^= r;
00611 r -= i;
00612 psfmt32[idxof((i + mid + lag) % N32)] ^= r;
00613 psfmt32[idxof(i)] = r;
00614 i = (i + 1) % N32;
00615 }
00616
00617 idx = N32;
00618 period_certification();
00619 initialized = 1;
00620 }