1 #ifndef RAPIDJSON_ENCODINGS_H_
2 #define RAPIDJSON_ENCODINGS_H_
8 RAPIDJSON_DIAG_OFF(4244)
9 RAPIDJSON_DIAG_OFF(4702)
10 #elif defined(__GNUC__)
12 RAPIDJSON_DIAG_OFF(effc++)
32 template<typename OutputStream>
33 static void Encode(OutputStream& os, unsigned codepoint);
39 template <typename InputStream>
40 static bool Decode(InputStream& is, unsigned* codepoint);
47 template <typename InputStream, typename OutputStream>
48 static bool Validate(InputStream& is, OutputStream& os);
50 // The following functions are deal with byte streams.
53 template <typename InputByteStream>
54 static CharType TakeBOM(InputByteStream& is);
57 template <typename InputByteStream>
58 static Ch Take(InputByteStream& is);
61 template <typename OutputByteStream>
62 static void PutBOM(OutputByteStream& os);
65 template <typename OutputByteStream>
66 static void Put(OutputByteStream& os, Ch c);
80 template<
typename CharType =
char>
84 enum { supportUnicode = 1 };
86 template<
typename OutputStream>
87 static void Encode(OutputStream& os,
unsigned codepoint) {
88 if (codepoint <= 0x7F)
89 os.Put(static_cast<Ch>(codepoint & 0xFF));
90 else if (codepoint <= 0x7FF) {
91 os.Put(static_cast<Ch>(0xC0 | ((codepoint >> 6) & 0xFF)));
92 os.Put(static_cast<Ch>(0x80 | ((codepoint & 0x3F))));
94 else if (codepoint <= 0xFFFF) {
95 os.Put(static_cast<Ch>(0xE0 | ((codepoint >> 12) & 0xFF)));
96 os.Put(static_cast<Ch>(0x80 | ((codepoint >> 6) & 0x3F)));
97 os.Put(static_cast<Ch>(0x80 | (codepoint & 0x3F)));
101 os.Put(static_cast<Ch>(0xF0 | ((codepoint >> 18) & 0xFF)));
102 os.Put(static_cast<Ch>(0x80 | ((codepoint >> 12) & 0x3F)));
103 os.Put(static_cast<Ch>(0x80 | ((codepoint >> 6) & 0x3F)));
104 os.Put(static_cast<Ch>(0x80 | (codepoint & 0x3F)));
108 template <
typename InputStream>
109 static bool Decode(InputStream& is,
unsigned* codepoint) {
110 #define COPY() c = is.Take(); *codepoint = (*codepoint << 6) | ((unsigned char)c & 0x3Fu)
111 #define TRANS(mask) result &= ((GetRange((unsigned char)c) & mask) != 0)
112 #define TAIL() COPY(); TRANS(0x70)
115 *codepoint = (
unsigned char)c;
119 unsigned char type = GetRange((
unsigned char)c);
120 *codepoint = (0xFF >> type) & (
unsigned char)c;
123 case 2: TAIL();
return result;
124 case 3: TAIL(); TAIL();
return result;
125 case 4: COPY(); TRANS(0x50); TAIL();
return result;
126 case 5: COPY(); TRANS(0x10); TAIL(); TAIL();
return result;
127 case 6: TAIL(); TAIL(); TAIL();
return result;
128 case 10: COPY(); TRANS(0x20); TAIL();
return result;
129 case 11: COPY(); TRANS(0x60); TAIL(); TAIL();
return result;
130 default:
return false;
137 template <
typename InputStream,
typename OutputStream>
138 static bool Validate(InputStream& is, OutputStream& os) {
139 #define COPY() os.Put(c = is.Take())
140 #define TRANS(mask) result &= ((GetRange((unsigned char)c) & mask) != 0)
141 #define TAIL() COPY(); TRANS(0x70)
148 switch (GetRange((
unsigned char)c)) {
149 case 2: TAIL();
return result;
150 case 3: TAIL(); TAIL();
return result;
151 case 4: COPY(); TRANS(0x50); TAIL();
return result;
152 case 5: COPY(); TRANS(0x10); TAIL(); TAIL();
return result;
153 case 6: TAIL(); TAIL(); TAIL();
return result;
154 case 10: COPY(); TRANS(0x20); TAIL();
return result;
155 case 11: COPY(); TRANS(0x60); TAIL(); TAIL();
return result;
156 default:
return false;
163 static unsigned char GetRange(
unsigned char c) {
166 static const unsigned char type[] = {
167 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
168 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
169 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
170 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
171 0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,
172 0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,
173 0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,
174 0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,
175 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
176 10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8,
181 template <
typename InputByteStream>
182 static CharType TakeBOM(InputByteStream& is) {
185 if ((
unsigned char)c != 0xEFu)
return c;
187 if ((
unsigned char)c != 0xBBu)
return c;
189 if ((
unsigned char)c != 0xBFu)
return c;
194 template <
typename InputByteStream>
195 static Ch Take(InputByteStream& is) {
200 template <
typename OutputByteStream>
201 static void PutBOM(OutputByteStream& os) {
203 os.Put(0xEFu); os.Put(0xBBu); os.Put(0xBFu);
206 template <
typename OutputByteStream>
207 static void Put(OutputByteStream& os, Ch c) {
209 os.Put(static_cast<typename OutputByteStream::Ch>(c));
225 template<
typename CharType =
wchar_t>
228 RAPIDJSON_STATIC_ASSERT(
sizeof(Ch) >= 2);
230 enum { supportUnicode = 1 };
232 template<
typename OutputStream>
233 static void Encode(OutputStream& os,
unsigned codepoint) {
234 RAPIDJSON_STATIC_ASSERT(
sizeof(
typename OutputStream::Ch) >= 2);
235 if (codepoint <= 0xFFFF) {
237 os.Put(static_cast<typename OutputStream::Ch>(codepoint));
241 unsigned v = codepoint - 0x10000;
242 os.Put(static_cast<typename OutputStream::Ch>((v >> 10) | 0xD800));
243 os.Put((v & 0x3FF) | 0xDC00);
247 template <
typename InputStream>
248 static bool Decode(InputStream& is,
unsigned* codepoint) {
249 RAPIDJSON_STATIC_ASSERT(
sizeof(
typename InputStream::Ch) >= 2);
251 if (c < 0xD800 || c > 0xDFFF) {
255 else if (c <= 0xDBFF) {
256 *codepoint = (c & 0x3FF) << 10;
258 *codepoint |= (c & 0x3FF);
259 *codepoint += 0x10000;
260 return c >= 0xDC00 && c <= 0xDFFF;
265 template <
typename InputStream,
typename OutputStream>
266 static bool Validate(InputStream& is, OutputStream& os) {
267 RAPIDJSON_STATIC_ASSERT(
sizeof(
typename InputStream::Ch) >= 2);
268 RAPIDJSON_STATIC_ASSERT(
sizeof(
typename OutputStream::Ch) >= 2);
270 os.Put(c = is.Take());
271 if (c < 0xD800 || c > 0xDFFF)
273 else if (c <= 0xDBFF) {
274 os.Put(c = is.Take());
275 return c >= 0xDC00 && c <= 0xDFFF;
282 template<
typename CharType =
wchar_t>
284 template <
typename InputByteStream>
285 static CharType TakeBOM(InputByteStream& is) {
286 RAPIDJSON_STATIC_ASSERT(
sizeof(
typename InputByteStream::Ch) == 1);
287 CharType c = Take(is);
288 return (
unsigned short)c == 0xFEFFu ? Take(is) : c;
291 template <
typename InputByteStream>
292 static CharType Take(InputByteStream& is) {
293 RAPIDJSON_STATIC_ASSERT(
sizeof(
typename InputByteStream::Ch) == 1);
294 CharType c = (
unsigned char)is.Take();
295 c |= (
unsigned char)is.Take() << 8;
299 template <
typename OutputByteStream>
300 static void PutBOM(OutputByteStream& os) {
301 RAPIDJSON_STATIC_ASSERT(
sizeof(
typename OutputByteStream::Ch) == 1);
302 os.Put(0xFFu); os.Put(0xFEu);
305 template <
typename OutputByteStream>
306 static void Put(OutputByteStream& os, CharType c) {
307 RAPIDJSON_STATIC_ASSERT(
sizeof(
typename OutputByteStream::Ch) == 1);
309 os.Put((c >> 8) & 0xFFu);
314 template<
typename CharType =
wchar_t>
316 template <
typename InputByteStream>
317 static CharType TakeBOM(InputByteStream& is) {
318 RAPIDJSON_STATIC_ASSERT(
sizeof(
typename InputByteStream::Ch) == 1);
319 CharType c = Take(is);
320 return (
unsigned short)c == 0xFEFFu ? Take(is) : c;
323 template <
typename InputByteStream>
324 static CharType Take(InputByteStream& is) {
325 RAPIDJSON_STATIC_ASSERT(
sizeof(
typename InputByteStream::Ch) == 1);
326 CharType c = (
unsigned char)is.Take() << 8;
327 c |= (
unsigned char)is.Take();
331 template <
typename OutputByteStream>
332 static void PutBOM(OutputByteStream& os) {
333 RAPIDJSON_STATIC_ASSERT(
sizeof(
typename OutputByteStream::Ch) == 1);
334 os.Put(0xFEu); os.Put(0xFFu);
337 template <
typename OutputByteStream>
338 static void Put(OutputByteStream& os, CharType c) {
339 RAPIDJSON_STATIC_ASSERT(
sizeof(
typename OutputByteStream::Ch) == 1);
340 os.Put((c >> 8) & 0xFFu);
356 template<
typename CharType =
unsigned>
359 RAPIDJSON_STATIC_ASSERT(
sizeof(Ch) >= 4);
361 enum { supportUnicode = 1 };
363 template<
typename OutputStream>
364 static void Encode(OutputStream& os,
unsigned codepoint) {
365 RAPIDJSON_STATIC_ASSERT(
sizeof(
typename OutputStream::Ch) >= 4);
370 template <
typename InputStream>
371 static bool Decode(InputStream& is,
unsigned* codepoint) {
372 RAPIDJSON_STATIC_ASSERT(
sizeof(
typename InputStream::Ch) >= 4);
375 return c <= 0x10FFFF;
378 template <
typename InputStream,
typename OutputStream>
379 static bool Validate(InputStream& is, OutputStream& os) {
380 RAPIDJSON_STATIC_ASSERT(
sizeof(
typename InputStream::Ch) >= 4);
382 os.Put(c = is.Take());
383 return c <= 0x10FFFF;
388 template<
typename CharType =
unsigned>
390 template <
typename InputByteStream>
391 static CharType TakeBOM(InputByteStream& is) {
392 RAPIDJSON_STATIC_ASSERT(
sizeof(
typename InputByteStream::Ch) == 1);
393 CharType c = Take(is);
394 return (
unsigned)c == 0x0000FEFFu ? Take(is) : c;
397 template <
typename InputByteStream>
398 static CharType Take(InputByteStream& is) {
399 RAPIDJSON_STATIC_ASSERT(
sizeof(
typename InputByteStream::Ch) == 1);
400 CharType c = (
unsigned char)is.Take();
401 c |= (
unsigned char)is.Take() << 8;
402 c |= (
unsigned char)is.Take() << 16;
403 c |= (
unsigned char)is.Take() << 24;
407 template <
typename OutputByteStream>
408 static void PutBOM(OutputByteStream& os) {
409 RAPIDJSON_STATIC_ASSERT(
sizeof(
typename OutputByteStream::Ch) == 1);
410 os.Put(0xFFu); os.Put(0xFEu); os.Put(0x00u); os.Put(0x00u);
413 template <
typename OutputByteStream>
414 static void Put(OutputByteStream& os, CharType c) {
415 RAPIDJSON_STATIC_ASSERT(
sizeof(
typename OutputByteStream::Ch) == 1);
417 os.Put((c >> 8) & 0xFFu);
418 os.Put((c >> 16) & 0xFFu);
419 os.Put((c >> 24) & 0xFFu);
424 template<
typename CharType =
unsigned>
426 template <
typename InputByteStream>
427 static CharType TakeBOM(InputByteStream& is) {
428 RAPIDJSON_STATIC_ASSERT(
sizeof(
typename InputByteStream::Ch) == 1);
429 CharType c = Take(is);
430 return (
unsigned)c == 0x0000FEFFu ? Take(is) : c;
433 template <
typename InputByteStream>
434 static CharType Take(InputByteStream& is) {
435 RAPIDJSON_STATIC_ASSERT(
sizeof(
typename InputByteStream::Ch) == 1);
436 CharType c = (
unsigned char)is.Take() << 24;
437 c |= (
unsigned char)is.Take() << 16;
438 c |= (
unsigned char)is.Take() << 8;
439 c |= (
unsigned char)is.Take();
443 template <
typename OutputByteStream>
444 static void PutBOM(OutputByteStream& os) {
445 RAPIDJSON_STATIC_ASSERT(
sizeof(
typename OutputByteStream::Ch) == 1);
446 os.Put(0x00u); os.Put(0x00u); os.Put(0xFEu); os.Put(0xFFu);
449 template <
typename OutputByteStream>
450 static void Put(OutputByteStream& os, CharType c) {
451 RAPIDJSON_STATIC_ASSERT(
sizeof(
typename OutputByteStream::Ch) == 1);
452 os.Put((c >> 24) & 0xFFu);
453 os.Put((c >> 16) & 0xFFu);
454 os.Put((c >> 8) & 0xFFu);
467 template<
typename CharType =
char>
471 enum { supportUnicode = 0 };
473 template<
typename OutputStream>
474 static void Encode(OutputStream& os,
unsigned codepoint) {
476 os.Put(static_cast<Ch>(codepoint & 0xFF));
479 template <
typename InputStream>
480 static bool Decode(InputStream& is,
unsigned* codepoint) {
481 unsigned char c =
static_cast<unsigned char>(is.Take());
486 template <
typename InputStream,
typename OutputStream>
487 static bool Validate(InputStream& is, OutputStream& os) {
488 unsigned char c = is.Take();
493 template <
typename InputByteStream>
494 static CharType TakeBOM(InputByteStream& is) {
500 template <
typename InputByteStream>
501 static Ch Take(InputByteStream& is) {
506 template <
typename OutputByteStream>
507 static void PutBOM(OutputByteStream& os) {
512 template <
typename OutputByteStream>
513 static void Put(OutputByteStream& os, Ch c) {
515 os.Put(static_cast<typename OutputByteStream::Ch>(c));
534 template<
typename CharType>
538 enum { supportUnicode = 1 };
540 #define RAPIDJSON_ENCODINGS_FUNC(x) UTF8<Ch>::x, UTF16LE<Ch>::x, UTF16BE<Ch>::x, UTF32LE<Ch>::x, UTF32BE<Ch>::x
542 template<
typename OutputStream>
543 RAPIDJSON_FORCEINLINE
static void Encode(OutputStream& os,
unsigned codepoint) {
544 typedef void (*EncodeFunc)(OutputStream&, unsigned);
545 static const EncodeFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(Encode) };
546 (*f[os.GetType()])(os, codepoint);
549 template <
typename InputStream>
550 RAPIDJSON_FORCEINLINE
static bool Decode(InputStream& is,
unsigned* codepoint) {
551 typedef bool (*DecodeFunc)(InputStream&,
unsigned*);
552 static const DecodeFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(Decode) };
553 return (*f[is.GetType()])(is, codepoint);
556 template <
typename InputStream,
typename OutputStream>
557 RAPIDJSON_FORCEINLINE
static bool Validate(InputStream& is, OutputStream& os) {
558 typedef bool (*ValidateFunc)(InputStream&, OutputStream&);
559 static const ValidateFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(Validate) };
560 return (*f[is.GetType()])(is, os);
563 #undef RAPIDJSON_ENCODINGS_FUNC
570 template<
typename SourceEncoding,
typename TargetEncoding>
573 template<
typename InputStream,
typename OutputStream>
574 RAPIDJSON_FORCEINLINE
static bool Transcode(InputStream& is, OutputStream& os) {
576 if (!SourceEncoding::Decode(is, &codepoint))
578 TargetEncoding::Encode(os, codepoint);
583 template<
typename InputStream,
typename OutputStream>
584 RAPIDJSON_FORCEINLINE
static bool Validate(InputStream& is, OutputStream& os) {
590 template<
typename Encoding>
592 template<
typename InputStream,
typename OutputStream>
593 RAPIDJSON_FORCEINLINE
static bool Transcode(InputStream& is, OutputStream& os) {
598 template<
typename InputStream,
typename OutputStream>
599 RAPIDJSON_FORCEINLINE
static bool Validate(InputStream& is, OutputStream& os) {
600 return Encoding::Validate(is, os);
606 #if defined(__GNUC__) || defined(_MSV_VER)
610 #endif // RAPIDJSON_ENCODINGS_H_