UTF-8 Decoding & Encoding |
返信 |
utf-8 encode decode C C++ programming |
// U-00000000 - U-0000007F: 0xxxxxxx
// U-00000080 - U-000007FF: 110xxxxx 10xxxxxx
// U-00000800 - U-0000FFFF: 1110xxxx 10xxxxxx 10xxxxxx
// U-00010000 - U-001FFFFF: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
// U-00200000 - U-03FFFFFF: 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
// U-04000000 - U-7FFFFFFF: 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
byte GetUTF8ByteCount(byte first) { if (first < 0x80) return 1; else if (first < 0xE0) return 2; else if (first < 0xF0) return 3; else if (first < 0xF8) return 4; else if (first < 0xFC) return 5; else return 6; } wchar_t DecodeUTF8(byte first, byte second, byte third) { switch( GetUTF8ByteCount(first) ) { case 1: return first; case 2: if (second) return ((first & 0x1F) << 6) | (second & 0x3F); case 3: if (second && third) return ((first & 0x0F) << 12) | ((second & 0x3F) << 6) | third & 0x3F; } return NULL; } std::vector<byte> EncodeUTF8(wchar_t wideChar) { std::vector<byte> result; result.reserve(3); if (InRange(wideChar, 0x00, 0x7F)) result.push_back( wideChar ); else if (InRange(wideChar, 0x80, 0x07FF)) { result.push_back( (wideChar >> 5) & 0x1F | 0xC0 ); result.push_back( wideChar & 0x1F | 0x80 ); } else if (InRange(wideChar, 0x0800, 0xFFFF)) { result.push_back( (wideChar >> 10) & 0x0F | 0xE0 ); result.push_back( (wideChar >> 5) & 0x1F | 0x80 ); result.push_back( wideChar & 0x1F | 0x80 ); } return result; }
-- by Sizuha
投稿者 v6ktw2 | 返信 (0) | トラックバック (0)