32XConvertUtf8ToUcs(
const unsigned char *buf,
48 && (buf[1] & 0xC0) == 0x80
49 && (buf[2] & 0xC0) == 0x80
50 && (buf[3] & 0xC0) == 0x80
51 && (buf[4] & 0xC0) == 0x80) {
53 *ucs = ((buf[0] & ~0xF8) << 24) +
54 ((buf[1] & ~0x80) << 18) +
55 ((buf[2] & ~0x80) << 12) +
56 ((buf[3] & ~0x80) << 6) +
58 if (*ucs > 0x001FFFFF && *ucs < 0x01000000)
return 5;
61 && (buf[1] & 0xC0) == 0x80
62 && (buf[2] & 0xC0) == 0x80
63 && (buf[3] & 0xC0) == 0x80) {
65 *ucs = ((buf[0] & ~0xF0) << 18) +
66 ((buf[1] & ~0x80) << 12) +
67 ((buf[2] & ~0x80) << 6) +
69 if (*ucs > 0x0000FFFF)
return 4;
72 && (buf[1] & 0xC0) == 0x80
73 && (buf[2] & 0xC0) == 0x80) {
75 *ucs = ((buf[0] & ~0xE0) << 12) +
76 ((buf[1] & ~0x80) << 6) +
78 if (*ucs > 0x000007FF)
return 3;
80 }
else if (len > 1 && (buf[1] & 0xC0) == 0x80) {
82 *ucs = ((buf[0] & ~0xC0) << 6) +
84 if (*ucs > 0x0000007F)
return 2;
93 *ucs = (
unsigned int)
'?';
102XConvertUcsToUtf8(
unsigned int ucs,
105 if (ucs < 0x000080) {
108 }
else if (ucs < 0x000800) {
109 buf[0] = 0xC0 | (ucs >> 6);
110 buf[1] = 0x80 | (ucs & 0x3F);
112 }
else if (ucs < 0x010000) {
113 buf[0] = 0xE0 | (ucs >> 12);
114 buf[1] = 0x80 | ((ucs >> 6) & 0x3F);
115 buf[2] = 0x80 | (ucs & 0x3F);
117 }
else if (ucs < 0x00200000) {
118 buf[0] = 0xF0 | (ucs >> 18);
119 buf[1] = 0x80 | ((ucs >> 12) & 0x3F);
120 buf[2] = 0x80 | ((ucs >> 6) & 0x3F);
121 buf[3] = 0x80 | (ucs & 0x3F);
123 }
else if (ucs < 0x01000000) {
124 buf[0] = 0xF8 | (ucs >> 24);
125 buf[1] = 0x80 | ((ucs >> 18) & 0x3F);
126 buf[2] = 0x80 | ((ucs >> 12) & 0x3F);
127 buf[3] = 0x80 | ((ucs >> 6) & 0x3F);
128 buf[4] = 0x80 | (ucs & 0x3F);
140XUtf8CharByteLen(
const unsigned char *buf,
143 return XConvertUtf8ToUcs(buf, len, &ucs);
150XCountUtf8Char(
const unsigned char *buf,
156 int cl = XUtf8CharByteLen(buf + i, len - i);
168XFastConvertUtf8ToUcs(
const unsigned char *buf,
183 }
else if (len > 4) {
185 *ucs = ((buf[0] & ~0xF8) << 24) +
186 ((buf[1] & ~0x80) << 18) +
187 ((buf[2] & ~0x80) << 12) +
188 ((buf[3] & ~0x80) << 6) +
192 }
else if (len > 3) {
194 *ucs = ((buf[0] & ~0xF0) << 18) +
195 ((buf[1] & ~0x80) << 12) +
196 ((buf[2] & ~0x80) << 6) +
200 }
else if (len > 2) {
202 *ucs = ((buf[0] & ~0xE0) << 12) +
203 ((buf[1] & ~0x80) << 6) +
207 }
else if (len > 1) {
209 *ucs = ((buf[0] & ~0xC0) << 6) +
214 }
else if (len > 0) {
220 *ucs = (
unsigned int)
'?';