UTF-8 Decoding & Encoding |
返信 |
utf-8 encode decode C C++ programming | |
// U-00000000 - U-0000007F: 0xxxxxxx
// U-00000080 - U-000007FF: 110xxxxx 10xxxxxx
// U-00000800 - U-0000FFFF: 1110xxxx 10xxxxxx 10xxxxxx
// U-00010000 - U-001FFFFF: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
// U-00200000 - U-03FFFFFF: 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
// U-04000000 - U-7FFFFFFF: 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
byte GetUTF8ByteCount(byte first)
{
if (first < 0x80) return 1;
else if (first < 0xE0) return 2;
else if (first < 0xF0) return 3;
else if (first < 0xF8) return 4;
else if (first < 0xFC) return 5;
else return 6;
}
wchar_t DecodeUTF8(byte first, byte second, byte third)
{
switch( GetUTF8ByteCount(first) )
{
case 1: return first;
case 2: if (second) return ((first & 0x1F) << 6) | (second & 0x3F);
case 3: if (second && third) return ((first & 0x0F) << 12) | ((second & 0x3F) << 6) | third & 0x3F;
}
return NULL;
}
std::vector<byte> EncodeUTF8(wchar_t wideChar)
{
std::vector<byte> result;
result.reserve(3);
if (InRange(wideChar, 0x00, 0x7F)) result.push_back( wideChar );
else if (InRange(wideChar, 0x80, 0x07FF)) {
result.push_back( (wideChar >> 5) & 0x1F | 0xC0 );
result.push_back( wideChar & 0x1F | 0x80 );
}
else if (InRange(wideChar, 0x0800, 0xFFFF)) {
result.push_back( (wideChar >> 10) & 0x0F | 0xE0 );
result.push_back( (wideChar >> 5) & 0x1F | 0x80 );
result.push_back( wideChar & 0x1F | 0x80 );
}
return result;
}-- by Sizuha
投稿者 v6ktw2 | 返信 (0) | トラックバック (0)