|
|
commit fba4d58509e337c3f868c7ae68b3566f85ba86f5
Author: Albrecht Schlosser <albrechts.fltk@online.de>
AuthorDate: Fri Jan 8 14:56:31 2021 +0100
Commit: Albrecht Schlosser <albrechts.fltk@online.de>
CommitDate: Fri Jan 8 14:57:34 2021 +0100
Fix Fl_Help_View::find() (issue #179)
Fix search (string comparison) which had a few different issues.
Document the function, arguments, and details about string matching.
To do: correctly match complex HTML entities like "€" with
Unicode code points >= U+0080 (UTF-8 multi byte encoding).
src/Fl_Help_View.cxx | 47 +++++++++++++++++++++++++++++++++--------------
1 file changed, 33 insertions(+), 14 deletions(-)
diff --git src/Fl_Help_View.cxx src/Fl_Help_View.cxx
index e27f81e..5041d6f 100644
--- src/Fl_Help_View.cxx
+++ src/Fl_Help_View.cxx
@@ -1177,10 +1177,27 @@ Fl_Help_View::draw()
} // draw()
-
/** Finds the specified string \p s at starting position \p p.
- \return the matching position or -1 if not found
+ The argument \p p and the return value are offsets in Fl_Help_View::value(),
+ counting from 0. If \p p is out of range, 0 is used.
+
+ The string comparison is simple but honors some special cases:
+ - the specified string \p s must be in UTF-8 encoding
+ - HTML tags in value() are filtered (not compared as such, they never match)
+ - HTML entities like '\<' or '\&x#20ac;' are converted to Unicode (UTF-8)
+ - ASCII characters (7-bit, \< 0x80) are compared case insensitive
+ - every newline (LF, '\\n') in value() is treated like a single space
+ - all other strings are compared as-is (byte by byte)
+
+ \todo complex HTML entities for Unicode code points \> 0x80 are currently treated
+ like one byte (not character!) and do not (yet) match correctly ("<" matches "<"
+ but "â?¬" doesn't match "€", and "ü" doesn't match "ü")
+
+ \param[in] s search string in UTF-8 encoding
+ \param[in] p starting position for search (0,...), Default = 0
+
+ \return the matching position or -1 if not found
*/
int // O - Matching position or -1 if not found
Fl_Help_View::find(const char *s, // I - String to find
@@ -1193,27 +1210,28 @@ Fl_Help_View::find(const char *s, // I - String to find
*bs, // Start of current comparison
*sp; // Search string pointer
-
DEBUG_FUNCTION(__LINE__,__FUNCTION__);
// Range check input and value...
if (!s || !value_) return -1;
if (p < 0 || p >= (int)strlen(value_)) p = 0;
- else if (p > 0) p ++;
// Look for the string...
- for (i = nblocks_, b = blocks_; i > 0; i --, b ++) {
+ for (i = nblocks_, b = blocks_; i > 0; i--, b++) {
if (b->end < (value_ + p))
continue;
if (b->start < (value_ + p)) bp = value_ + p;
else bp = b->start;
- for (sp = s, bs = bp; *sp && *bp && bp < b->end; bp ++) {
+ for (sp = s, bs = bp; *sp && *bp && bp < b->end; bp++) {
if (*bp == '<') {
// skip to end of element...
- while (*bp && bp < b->end && *bp != '>') bp ++;
+ while (*bp && bp < b->end && *bp != '>') bp++;
+ // no match, so reset to start of search...
+ sp = s;
+ bs = bp + 1;
continue;
} else if (*bp == '&') {
// decode HTML entity...
@@ -1221,6 +1239,8 @@ Fl_Help_View::find(const char *s, // I - String to find
else bp = strchr(bp + 1, ';') + 1;
} else c = *bp;
+ if (c == '\n') c = ' '; // treat newline as a single space
+
// *FIXME* *UTF-8* (A.S. 02/14/2016)
// At this point c may be an arbitrary Unicode Code Point corresponding
// to a quoted character (see above), i.e. it _can_ be a multi byte
@@ -1229,19 +1249,18 @@ Fl_Help_View::find(const char *s, // I - String to find
// For instance: "€" == 0x20ac -> 0xe2 0x82 0xac (UTF-8: 3 bytes).
// Hint: use fl_utf8encode() [see below]
- if (tolower(*sp) == tolower(c)) sp ++;
- else {
- // No match, so reset to start of search...
+ if (c > 0x20 && c < 0x80 && tolower(*sp) == tolower(c)) sp++;
+ else if (*sp == c) sp++;
+ else { // No match, so reset to start of search...
sp = s;
- bs ++;
bp = bs;
+ bs++;
}
}
- if (!*sp) {
- // Found a match!
+ if (!*sp) { // Found a match!
topline(b->y - b->h);
- return (int) (b->end - value_);
+ return int(bs - value_);
}
}
[ Direct Link to Message ] | |
|
| |