FLTK 1.3.4
utf8.h
1 /* $XFree86: xc/lib/X11/lcUniConv/utf8.h,v 1.3 2000/11/28 18:50:07 dawes Exp $ */
2 
3 /*
4  * UTF-8
5  */
6 
7 /* Specification: RFC 2279 */
8 
9 static int
10 utf8_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n)
11 {
12  unsigned char c = s[0];
13 
14  if (c < 0x80) {
15  *pwc = c;
16  return 1;
17  } else if (c < 0xc2) {
18  return RET_ILSEQ;
19  } else if (c < 0xe0) {
20  if (n < 2)
21  return RET_TOOFEW(0);
22  if (!((s[1] ^ 0x80) < 0x40))
23  return RET_ILSEQ;
24  *pwc = ((ucs4_t) (c & 0x1f) << 6)
25  | (ucs4_t) (s[1] ^ 0x80);
26  return 2;
27  } else if (c < 0xf0) {
28  if (n < 3)
29  return RET_TOOFEW(0);
30  if (!((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
31  && (c >= 0xe1 || s[1] >= 0xa0)))
32  return RET_ILSEQ;
33  *pwc = ((ucs4_t) (c & 0x0f) << 12)
34  | ((ucs4_t) (s[1] ^ 0x80) << 6)
35  | (ucs4_t) (s[2] ^ 0x80);
36  return 3;
37  } else if (c < 0xf8) {
38  if (n < 4)
39  return RET_TOOFEW(0);
40  if (!((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
41  && (s[3] ^ 0x80) < 0x40
42  && (c >= 0xf1 || s[1] >= 0x90)))
43  return RET_ILSEQ;
44  *pwc = ((ucs4_t) (c & 0x07) << 18)
45  | ((ucs4_t) (s[1] ^ 0x80) << 12)
46  | ((ucs4_t) (s[2] ^ 0x80) << 6)
47  | (ucs4_t) (s[3] ^ 0x80);
48  return 4;
49  } else if (c < 0xfc) {
50  if (n < 5)
51  return RET_TOOFEW(0);
52  if (!((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
53  && (s[3] ^ 0x80) < 0x40 && (s[4] ^ 0x80) < 0x40
54  && (c >= 0xf9 || s[1] >= 0x88)))
55  return RET_ILSEQ;
56  *pwc = ((ucs4_t) (c & 0x03) << 24)
57  | ((ucs4_t) (s[1] ^ 0x80) << 18)
58  | ((ucs4_t) (s[2] ^ 0x80) << 12)
59  | ((ucs4_t) (s[3] ^ 0x80) << 6)
60  | (ucs4_t) (s[4] ^ 0x80);
61  return 5;
62  } else if (c < 0xfe) {
63  if (n < 6)
64  return RET_TOOFEW(0);
65  if (!((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
66  && (s[3] ^ 0x80) < 0x40 && (s[4] ^ 0x80) < 0x40
67  && (s[5] ^ 0x80) < 0x40
68  && (c >= 0xfd || s[1] >= 0x84)))
69  return RET_ILSEQ;
70  *pwc = ((ucs4_t) (c & 0x01) << 30)
71  | ((ucs4_t) (s[1] ^ 0x80) << 24)
72  | ((ucs4_t) (s[2] ^ 0x80) << 18)
73  | ((ucs4_t) (s[3] ^ 0x80) << 12)
74  | ((ucs4_t) (s[4] ^ 0x80) << 6)
75  | (ucs4_t) (s[5] ^ 0x80);
76  return 6;
77  } else
78  return RET_ILSEQ;
79 }
80 
81 static int
82 utf8_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, int n) /* n == 0 is acceptable */
83 {
84  int count;
85  if (wc < 0x80)
86  count = 1;
87  else if (wc < 0x800)
88  count = 2;
89  else if (wc < 0x10000)
90  count = 3;
91  else if (wc < 0x200000)
92  count = 4;
93  else if (wc < 0x4000000)
94  count = 5;
95  else if (wc <= 0x7fffffff)
96  count = 6;
97  else
98  return RET_ILSEQ;
99  if (n < count)
100  return RET_TOOSMALL;
101  switch (count) { /* note: code falls through cases! */
102  case 6: r[5] = 0x80 | (wc & 0x3f); wc = wc >> 6; wc |= 0x4000000;
103  case 5: r[4] = 0x80 | (wc & 0x3f); wc = wc >> 6; wc |= 0x200000;
104  case 4: r[3] = 0x80 | (wc & 0x3f); wc = wc >> 6; wc |= 0x10000;
105  case 3: r[2] = 0x80 | (wc & 0x3f); wc = wc >> 6; wc |= 0x800;
106  case 2: r[1] = 0x80 | (wc & 0x3f); wc = wc >> 6; wc |= 0xc0;
107  case 1: r[0] = wc;
108  }
109  return count;
110 }