FLTK 1.3.9
Loading...
Searching...
No Matches
utf8.h
1/* $XFree86: xc/lib/X11/lcUniConv/utf8.h,v 1.3 2000/11/28 18:50:07 dawes Exp $ */
2
3/*
4 * UTF-8
5 */
6
7/* Specification: RFC 2279 */
8
9static int
10utf8_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n)
11{
12 unsigned char c = s[0];
13
14 if (c < 0x80) {
15 *pwc = c;
16 return 1;
17 } else if (c < 0xc2) {
18 return RET_ILSEQ;
19 } else if (c < 0xe0) {
20 if (n < 2)
21 return RET_TOOFEW(0);
22 if (!((s[1] ^ 0x80) < 0x40))
23 return RET_ILSEQ;
24 *pwc = ((ucs4_t) (c & 0x1f) << 6)
25 | (ucs4_t) (s[1] ^ 0x80);
26 return 2;
27 } else if (c < 0xf0) {
28 if (n < 3)
29 return RET_TOOFEW(0);
30 if (!((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
31 && (c >= 0xe1 || s[1] >= 0xa0)))
32 return RET_ILSEQ;
33 *pwc = ((ucs4_t) (c & 0x0f) << 12)
34 | ((ucs4_t) (s[1] ^ 0x80) << 6)
35 | (ucs4_t) (s[2] ^ 0x80);
36 return 3;
37 } else if (c < 0xf8) {
38 if (n < 4)
39 return RET_TOOFEW(0);
40 if (!((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
41 && (s[3] ^ 0x80) < 0x40
42 && (c >= 0xf1 || s[1] >= 0x90)))
43 return RET_ILSEQ;
44 *pwc = ((ucs4_t) (c & 0x07) << 18)
45 | ((ucs4_t) (s[1] ^ 0x80) << 12)
46 | ((ucs4_t) (s[2] ^ 0x80) << 6)
47 | (ucs4_t) (s[3] ^ 0x80);
48 return 4;
49 } else if (c < 0xfc) {
50 if (n < 5)
51 return RET_TOOFEW(0);
52 if (!((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
53 && (s[3] ^ 0x80) < 0x40 && (s[4] ^ 0x80) < 0x40
54 && (c >= 0xf9 || s[1] >= 0x88)))
55 return RET_ILSEQ;
56 *pwc = ((ucs4_t) (c & 0x03) << 24)
57 | ((ucs4_t) (s[1] ^ 0x80) << 18)
58 | ((ucs4_t) (s[2] ^ 0x80) << 12)
59 | ((ucs4_t) (s[3] ^ 0x80) << 6)
60 | (ucs4_t) (s[4] ^ 0x80);
61 return 5;
62 } else if (c < 0xfe) {
63 if (n < 6)
64 return RET_TOOFEW(0);
65 if (!((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
66 && (s[3] ^ 0x80) < 0x40 && (s[4] ^ 0x80) < 0x40
67 && (s[5] ^ 0x80) < 0x40
68 && (c >= 0xfd || s[1] >= 0x84)))
69 return RET_ILSEQ;
70 *pwc = ((ucs4_t) (c & 0x01) << 30)
71 | ((ucs4_t) (s[1] ^ 0x80) << 24)
72 | ((ucs4_t) (s[2] ^ 0x80) << 18)
73 | ((ucs4_t) (s[3] ^ 0x80) << 12)
74 | ((ucs4_t) (s[4] ^ 0x80) << 6)
75 | (ucs4_t) (s[5] ^ 0x80);
76 return 6;
77 } else
78 return RET_ILSEQ;
79}
80
81static int
82utf8_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, int n) /* n == 0 is acceptable */
83{
84 int count;
85 if (wc < 0x80)
86 count = 1;
87 else if (wc < 0x800)
88 count = 2;
89 else if (wc < 0x10000)
90 count = 3;
91 else if (wc < 0x200000)
92 count = 4;
93 else if (wc < 0x4000000)
94 count = 5;
95 else if (wc <= 0x7fffffff)
96 count = 6;
97 else
98 return RET_ILSEQ;
99 if (n < count)
100 return RET_TOOSMALL;
101 switch (count) { /* note: code falls through cases! */
102 case 6: r[5] = 0x80 | (wc & 0x3f); wc = wc >> 6; wc |= 0x4000000;
103 case 5: r[4] = 0x80 | (wc & 0x3f); wc = wc >> 6; wc |= 0x200000;
104 case 4: r[3] = 0x80 | (wc & 0x3f); wc = wc >> 6; wc |= 0x10000;
105 case 3: r[2] = 0x80 | (wc & 0x3f); wc = wc >> 6; wc |= 0x800;
106 case 2: r[1] = 0x80 | (wc & 0x3f); wc = wc >> 6; wc |= 0xc0;
107 case 1: r[0] = wc;
108 }
109 return count;
110}