From 570a5fab880f5952119ae3943393548c6571f8ec Mon Sep 17 00:00:00 2001 From: Slendi Date: Mon, 4 Aug 2025 02:28:47 +0300 Subject: [PATCH] Add support for emoji identifiers Signed-off-by: Slendi --- samples/utf8.dcfg | 2 +- src/dcfg.c | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/samples/utf8.dcfg b/samples/utf8.dcfg index 945375b..97a69aa 100644 --- a/samples/utf8.dcfg +++ b/samples/utf8.dcfg @@ -2,7 +2,7 @@ fn lib = { english = "The quick brown fox jumps over the lazy dog." 日本語 = "はじめまして!" Română = "Salutare! ĂÂÎȘȚăâîșț" - emoji = "👋🌍😊🚀🤖" + 😀 = "👋🌍😊🚀🤖" diacritics = "ŠĐĆŽšđž" français = "Ça déjà vu?" español = "¡El pingüino Wenceslao bebe whisky y zumo de piña!" diff --git a/src/dcfg.c b/src/dcfg.c index 321baf7..6859826 100644 --- a/src/dcfg.c +++ b/src/dcfg.c @@ -569,9 +569,10 @@ static inline bool is_space_cp(int32_t cp) } static inline bool is_alpha_cp(int32_t cp) { + utf8proc_category_t cat = utf8proc_category(cp); return (cp <= 0x7F && isalpha(cp)) - || (utf8proc_category(cp) >= UTF8PROC_CATEGORY_LU - && utf8proc_category(cp) <= UTF8PROC_CATEGORY_LO); + || (cat >= UTF8PROC_CATEGORY_LU && cat <= UTF8PROC_CATEGORY_LO) + || (cat == UTF8PROC_CATEGORY_SO); } static inline bool is_digit_cp(int32_t cp) {