FLTK 1.4.0
fl_utf8.h
Go to the documentation of this file.
1 /*
2  * Author: Jean-Marc Lienher ( http://oksid.ch )
3  * Copyright 2000-2010 by O'ksi'D.
4  * Copyright 2016-2021 by Bill Spitzak and others.
5  *
6  * This library is free software. Distribution and use rights are outlined in
7  * the file "COPYING" which should have been included with this file. If this
8  * file is missing or damaged, see the license at:
9  *
10  * https://www.fltk.org/COPYING.php
11  *
12  * Please see the following page on how to report bugs and issues:
13  *
14  * https://www.fltk.org/bugs.php
15  */
16 
17 /* Merged in some functionality from the fltk-2 version. IMM.
18  * The following code is an attempt to merge the functions incorporated in FLTK2
19  * with the functions provided in OksiD's fltk-1.1.6-utf8 port
20  */
21 
27 #ifndef _HAVE_FL_UTF8_HDR_
28 #define _HAVE_FL_UTF8_HDR_
29 
30 #include "Fl_Export.H"
31 #include "fl_types.h"
32 #include <stdio.h> // FILE *fl_fopen()
33 #include <sys/stat.h> // struct stat
34 
35 #ifdef __cplusplus
36 extern "C" {
37 #endif
38 
43 /* F2: comes from FLTK2 */
44 /* OD: comes from OksiD */
45 
51 FL_EXPORT int fl_utf8bytes(unsigned ucs);
52 
53 /* OD: returns the byte length of the first UTF-8 char sequence (returns -1 if not valid) */
54 FL_EXPORT int fl_utf8len(char c);
55 
56 /* OD: returns the byte length of the first UTF-8 char sequence (returns +1 if not valid) */
57 FL_EXPORT int fl_utf8len1(char c);
58 
59 /* OD: returns the number of Unicode chars in the UTF-8 string */
60 FL_EXPORT int fl_utf_nb_char(const unsigned char *buf, int len);
61 
62 /* F2: Convert the next UTF-8 char-sequence into a Unicode value (and say how many bytes were used) */
63 FL_EXPORT unsigned fl_utf8decode(const char* p, const char* end, int* len);
64 
65 /* F2: Encode a Unicode value into a UTF-8 sequence, return the number of bytes used */
66 FL_EXPORT int fl_utf8encode(unsigned ucs, char* buf);
67 
68 /* F2: Move forward to the next valid UTF-8 sequence start betwen start and end */
69 FL_EXPORT const char* fl_utf8fwd(const char* p, const char* start, const char* end);
70 
71 /* F2: Move backward to the previous valid UTF-8 sequence start */
72 FL_EXPORT const char* fl_utf8back(const char* p, const char* start, const char* end);
73 
74 /* XX: Convert a single 32-bit Unicode value into UTF16 */
75 FL_EXPORT unsigned fl_ucs_to_Utf16(const unsigned ucs, unsigned short *dst, const unsigned dstlen);
76 
77 /* F2: Convert a UTF-8 string into UTF16 */
78 FL_EXPORT unsigned fl_utf8toUtf16(const char* src, unsigned srclen, unsigned short* dst, unsigned dstlen);
79 
80 /* F2: Convert a UTF-8 string into a wide character string - makes UTF16 on win32, "UCS4" elsewhere */
81 FL_EXPORT unsigned fl_utf8towc(const char *src, unsigned srclen, wchar_t *dst, unsigned dstlen);
82 
83 /* F2: Convert a wide character string to UTF-8 - takes in UTF16 on win32, "UCS4" elsewhere */
84 FL_EXPORT unsigned fl_utf8fromwc(char *dst, unsigned dstlen, const wchar_t *src, unsigned srclen);
85 
86 /* F2: Convert a UTF-8 string into ASCII, eliding untranslatable glyphs */
87 FL_EXPORT unsigned fl_utf8toa (const char *src, unsigned srclen, char *dst, unsigned dstlen);
88 
89 /* F2: Convert 8859-1 string to UTF-8 */
90 FL_EXPORT unsigned fl_utf8froma (char *dst, unsigned dstlen, const char *src, unsigned srclen);
91 
92 /* F2: Returns true if the current O/S locale is UTF-8 */
93 FL_EXPORT int fl_utf8locale(void);
94 
95 /* F2: Examine the first len characters of src, to determine if the input text is UTF-8 or not
96  * NOTE: The value returned is not simply boolean - it contains information about the probable
97  * type of the src text. */
98 FL_EXPORT int fl_utf8test(const char *src, unsigned len);
99 
100 /* XX: return width of "raw" ucs character in columns.
101  * for internal use only */
102 FL_EXPORT int fl_wcwidth_(unsigned int ucs);
103 
104 /* XX: return width of UTF-8 character string in columns.
105  * NOTE: this may also do C1 control character (0x80 to 0x9f) to CP1252 mapping,
106  * depending on original build options */
107 FL_EXPORT int fl_wcwidth(const char *src);
108 
109 /* OD: Return true if the character is non-spacing */
110 FL_EXPORT unsigned int fl_nonspacing(unsigned int ucs);
111 
112 /* F2: Convert UTF-8 to a local multi-byte encoding - mainly for win32? */
113 FL_EXPORT unsigned fl_utf8to_mb(const char *src, unsigned srclen, char *dst, unsigned dstlen);
114 /* OD: Convert UTF-8 to a local multi-byte encoding */
115 FL_EXPORT char* fl_utf2mbcs(const char *src);
116 
117 /* F2: Convert a local multi-byte encoding to UTF-8 - mainly for win32? */
118 FL_EXPORT unsigned fl_utf8from_mb(char *dst, unsigned dstlen, const char *src, unsigned srclen);
119 
120 /*****************************************************************************/
121 #ifdef _WIN32
122 /* these two Windows-only functions are kept for API compatibility */
123 /* OD: Attempt to convert the UTF-8 string to the current locale */
124 FL_EXPORT char *fl_utf8_to_locale(const char *s, int len, unsigned int codepage);
125 
126 /* OD: Attempt to convert a string in the current locale to UTF-8 */
127 FL_EXPORT char *fl_locale_to_utf8(const char *s, int len, unsigned int codepage);
128 #endif /* _WIN32 */
129 
130 /*****************************************************************************
131  * The following functions are intended to provide portable, UTF-8 aware
132  * versions of standard functions
133  */
134 
135 /* OD: UTF-8 aware strncasecmp - converts to lower case Unicode and tests */
136 FL_EXPORT int fl_utf_strncasecmp(const char *s1, const char *s2, int n);
137 
138 /* OD: UTF-8 aware strcasecmp - converts to Unicode and tests */
139 FL_EXPORT int fl_utf_strcasecmp(const char *s1, const char *s2);
140 
141 /* OD: return the Unicode lower case value of ucs */
142 FL_EXPORT int fl_tolower(unsigned int ucs);
143 
144 /* OD: return the Unicode upper case value of ucs */
145 FL_EXPORT int fl_toupper(unsigned int ucs);
146 
147 /* OD: converts the UTF-8 string to the lower case equivalent */
148 FL_EXPORT int fl_utf_tolower(const unsigned char *str, int len, char *buf);
149 
150 /* OD: converts the UTF-8 string to the upper case equivalent */
151 FL_EXPORT int fl_utf_toupper(const unsigned char *str, int len, char *buf);
152 
153 /* OD: Portable UTF-8 aware chmod wrapper */
154 FL_EXPORT int fl_chmod(const char* f, int mode);
155 
156 /* OD: Portable UTF-8 aware access wrapper */
157 FL_EXPORT int fl_access(const char* f, int mode);
158 
159 /* OD: Portable UTF-8 aware stat wrapper */
160 FL_EXPORT int fl_stat(const char *path, struct stat *buffer);
161 
162 /* OD: Portable UTF-8 aware getcwd wrapper */
163 FL_EXPORT char *fl_getcwd(char *buf, int len);
164 
165 /* Portable UTF-8 aware chdir wrapper */
166 FL_EXPORT int fl_chdir(const char *path);
167 
168 /* OD: Portable UTF-8 aware fopen wrapper */
169 FL_EXPORT FILE *fl_fopen(const char *f, const char *mode);
170 
171 /* OD: Portable UTF-8 aware system wrapper */
172 FL_EXPORT int fl_system(const char* f);
173 
174 /* OD: Portable UTF-8 aware execvp wrapper */
175 FL_EXPORT int fl_execvp(const char *file, char *const *argv);
176 
177 /* OD: Portable UTF-8 aware open wrapper */
178 FL_EXPORT int fl_open(const char *fname, int oflags, ...);
179 
180 FL_EXPORT int fl_open_ext(const char *fname, int binary, int oflags, ...);
181 
182 /* Portable wrapper around unix-style close() function */
183 FL_EXPORT int fl_close_fd(int fd);
184 
185 /* OD: Portable UTF-8 aware unlink wrapper */
186 FL_EXPORT int fl_unlink(const char *fname);
187 
188 /* OD: Portable UTF-8 aware rmdir wrapper */
189 FL_EXPORT int fl_rmdir(const char *f);
190 
191 /* OD: Portable UTF-8 aware getenv wrapper */
192 FL_EXPORT char* fl_getenv(const char *name);
193 
194 /* Portable UTF-8 aware putenv wrapper */
195 FL_EXPORT int fl_putenv(const char *var);
196 
197 /* OD: Portable UTF-8 aware mkdir wrapper */
198 FL_EXPORT int fl_mkdir(const char* f, int mode);
199 
200 /* OD: Portable UTF-8 aware rename wrapper */
201 FL_EXPORT int fl_rename(const char* f, const char *t);
202 
203 
204 /* OD: Given a full pathname, this will create the directory path needed to hold the file named */
205 FL_EXPORT void fl_make_path_for_file( const char *path );
206 
207 /* OD: recursively create a path in the file system */
208 FL_EXPORT char fl_make_path( const char *path );
209 
210 
213 /*****************************************************************************/
214 
215 #ifdef __cplusplus
216 }
217 #endif /* __cplusplus */
218 
219 
220 #endif /* _HAVE_FL_UTF8_HDR_ */
This file contains simple "C"-style type definitions.
FL_EXPORT char * fl_getenv(const char *name)
Cross-platform function to get environment variables with a UTF-8 encoded name or value.
Definition: fl_utf8.cxx:311
FL_EXPORT int fl_open(const char *fname, int oflags,...)
Cross-platform function to open files with a UTF-8 encoded name.
Definition: fl_utf8.cxx:359
FL_EXPORT unsigned fl_utf8toa(const char *src, unsigned srclen, char *dst, unsigned dstlen)
Convert a UTF-8 sequence into an array of 1-byte characters.
Definition: fl_utf8.cxx:1056
FL_EXPORT int fl_chmod(const char *f, int mode)
Cross-platform function to set a files mode() with a UTF-8 encoded name or value.
Definition: fl_utf8.cxx:445
FL_EXPORT int fl_utf_strncasecmp(const char *s1, const char *s2, int n)
UTF-8 aware strncasecmp - converts to lower case Unicode and tests.
Definition: fl_utf8.cxx:151
FL_EXPORT int fl_utf8bytes(unsigned ucs)
Return the number of bytes needed to encode the given UCS4 character in UTF-8.
Definition: fl_utf8.cxx:845
FL_EXPORT int fl_utf8len(char c)
Returns the byte length of the UTF-8 sequence with first byte c, or -1 if c is not valid.
Definition: fl_utf8.cxx:69
FL_EXPORT int fl_wcwidth(const char *src)
extended wrapper around fl_wcwidth_(unsigned int ucs) function.
Definition: fl_utf8.cxx:1217
FL_EXPORT int fl_utf_toupper(const unsigned char *str, int len, char *buf)
Converts the string str to its upper case equivalent into buf.
Definition: fl_utf8.cxx:233
FL_EXPORT unsigned fl_utf8fromwc(char *dst, unsigned dstlen, const wchar_t *src, unsigned srclen)
Turn "wide characters" as returned by some system calls (especially on Windows) into UTF-8.
Definition: fl_utf8.cxx:1289
FL_EXPORT int fl_chdir(const char *path)
Cross-platform function to change the current working directory, given as a UTF-8 encoded string.
Definition: fl_utf8.cxx:508
FL_EXPORT int fl_system(const char *f)
Cross-platform function to run a system command with a UTF-8 encoded string.
Definition: fl_utf8.cxx:425
FL_EXPORT int fl_stat(const char *path, struct stat *buffer)
Cross-platform function to stat() a file using a UTF-8 encoded name or value.
Definition: fl_utf8.cxx:479
FL_EXPORT unsigned fl_utf8toUtf16(const char *src, unsigned srclen, unsigned short *dst, unsigned dstlen)
Convert a UTF-8 sequence into an array of 16-bit characters.
Definition: fl_utf8.cxx:998
FL_EXPORT int fl_close_fd(int fd)
Cross-platform function to close a file descriptor.
Definition: fl_utf8.cxx:372
FL_EXPORT int fl_rename(const char *f, const char *t)
Cross-platform function to rename a filesystem object using UTF-8 encoded names.
Definition: fl_utf8.cxx:584
FL_EXPORT unsigned fl_utf8to_mb(const char *src, unsigned srclen, char *dst, unsigned dstlen)
Convert the UTF-8 used by FLTK to the locale-specific encoding used for filenames (and sometimes used...
Definition: fl_utf8.cxx:1328
FL_EXPORT int fl_tolower(unsigned int ucs)
Returns the Unicode lower case value of ucs.
Definition: fl_utf8.cxx:187
FL_EXPORT int fl_putenv(const char *var)
Cross-platform function to write environment variables with a UTF-8 encoded name or value.
Definition: fl_utf8.cxx:343
FL_EXPORT unsigned int fl_nonspacing(unsigned int ucs)
Returns true if the Unicode character ucs is non-spacing.
Definition: fl_utf8.cxx:271
FL_EXPORT int fl_mkdir(const char *f, int mode)
Cross-platform function to create a directory with a UTF-8 encoded name.
Definition: fl_utf8.cxx:557
FL_EXPORT int fl_access(const char *f, int mode)
Cross-platform function to test a files access() with a UTF-8 encoded name or value.
Definition: fl_utf8.cxx:465
FL_EXPORT int fl_wcwidth_(unsigned int ucs)
Wrapper to adapt Markus Kuhn's implementation of wcwidth() for FLTK.
Definition: fl_utf8.cxx:1200
FL_EXPORT const char * fl_utf8fwd(const char *p, const char *start, const char *end)
Move p forward until it points to the start of a UTF-8 character.
Definition: fl_utf8.cxx:794
FL_EXPORT char * fl_getcwd(char *buf, int len)
Cross-platform function to get the current working directory as a UTF-8 encoded value.
Definition: fl_utf8.cxx:528
FL_EXPORT FILE * fl_fopen(const char *f, const char *mode)
Cross-platform function to open files with a UTF-8 encoded name.
Definition: fl_utf8.cxx:410
FL_EXPORT char * fl_utf2mbcs(const char *src)
Converts UTF-8 string s to a local multi-byte character string.
Definition: fl_utf8.cxx:280
FL_EXPORT char fl_make_path(const char *path)
Cross-platform function to recursively create a path in the file system.
Definition: fl_utf8.cxx:596
FL_EXPORT int fl_unlink(const char *fname)
Cross-platform function to unlink() (that is, delete) a file using a UTF-8 encoded filename.
Definition: fl_utf8.cxx:544
FL_EXPORT int fl_open_ext(const char *fname, int binary, int oflags,...)
Cross-platform function to open files with a UTF-8 encoded name.
Definition: fl_utf8.cxx:391
FL_EXPORT int fl_toupper(unsigned int ucs)
Returns the Unicode upper case value of ucs.
Definition: fl_utf8.cxx:195
FL_EXPORT unsigned fl_utf8towc(const char *src, unsigned srclen, wchar_t *dst, unsigned dstlen)
Converts a UTF-8 string into a wide character string.
Definition: fl_utf8.cxx:1255
FL_EXPORT int fl_utf_tolower(const unsigned char *str, int len, char *buf)
Converts the string str to its lower case equivalent into buf.
Definition: fl_utf8.cxx:204
FL_EXPORT int fl_utf8locale(void)
Return true if the "locale" seems to indicate that UTF-8 encoding is used.
Definition: fl_utf8.cxx:1307
FL_EXPORT int fl_rmdir(const char *f)
Cross-platform function to remove a directory with a UTF-8 encoded name.
Definition: fl_utf8.cxx:570
FL_EXPORT const char * fl_utf8back(const char *p, const char *start, const char *end)
Move p backward until it points to the start of a UTF-8 character.
Definition: fl_utf8.cxx:825
FL_EXPORT int fl_utf_strcasecmp(const char *s1, const char *s2)
UTF-8 aware strcasecmp - converts to Unicode and tests.
Definition: fl_utf8.cxx:179
FL_EXPORT unsigned fl_utf8decode(const char *p, const char *end, int *len)
Decode a single UTF-8 encoded character starting at p.
Definition: fl_utf8.cxx:702
FL_EXPORT unsigned fl_ucs_to_Utf16(const unsigned ucs, unsigned short *dst, const unsigned dstlen)
Convert a single 32-bit Unicode codepoint into an array of 16-bit characters.
Definition: fl_utf8.cxx:929
FL_EXPORT unsigned fl_utf8from_mb(char *dst, unsigned dstlen, const char *src, unsigned srclen)
Convert a filename from the locale-specific multibyte encoding used by Windows to UTF-8 as used by FL...
Definition: fl_utf8.cxx:1360
FL_EXPORT int fl_utf8encode(unsigned ucs, char *buf)
Write the UTF-8 encoding of ucs into buf and return the number of bytes written.
Definition: fl_utf8.cxx:875
FL_EXPORT unsigned fl_utf8froma(char *dst, unsigned dstlen, const char *src, unsigned srclen)
Convert an ISO-8859-1 (ie normal c-string) byte stream to UTF-8.
Definition: fl_utf8.cxx:1111
FL_EXPORT void fl_make_path_for_file(const char *path)
Cross-platform function to create a path for the file in the file system.
Definition: fl_utf8.cxx:616
FL_EXPORT int fl_utf8len1(char c)
Returns the byte length of the UTF-8 sequence with first byte c, or 1 if c is not valid.
Definition: fl_utf8.cxx:99
FL_EXPORT int fl_utf8test(const char *src, unsigned len)
Examines the first srclen bytes in src and returns a verdict on whether it is UTF-8 or not.
Definition: fl_utf8.cxx:1162
FL_EXPORT int fl_utf_nb_char(const unsigned char *buf, int len)
Returns the number of Unicode chars in the UTF-8 string.
Definition: fl_utf8.cxx:125