From c8b94b8780019dca7511efe1cf110f1b19bf7a7d Mon Sep 17 00:00:00 2001 From: Matan Ziv-Av Date: Tue, 2 Apr 2024 19:30:28 +0000 Subject: [PATCH] Override width of YiJing Hexagram Symbols Unicode characters (0x4dc0-0x4dff) Their Unicode width is 1, but GNU libc's wcwidth and libreadline override it to 2, so it makes sense to follow suit. The profile option "ignore wcwidth" returns the width to 1, if enabled, so it allows the old behavior if it desired. BUG: 421625 --- src/Screen.cpp | 4 ++-- src/characters/Character.h | 21 +++++++++++++-------- src/characters/CharacterWidth.cpp | 22 +++++++++++----------- tools/uni2characterwidth/overrides.txt | 1 + 4 files changed, 27 insertions(+), 21 deletions(-) diff --git a/src/Screen.cpp b/src/Screen.cpp index d60673975..74b870b8a 100644 --- a/src/Screen.cpp +++ b/src/Screen.cpp @@ -1096,7 +1096,7 @@ void Screen::displayCharacter(uint c) // We indicate the fact that a newline has to be triggered by // putting the cursor one right to the last column of the screen. - int w = Character::width(c); + int w = Character::width(c, _ignoreWcWidth); const QChar::Category category = QChar::category(c); if (w < 0) { // Non-printable character @@ -2189,7 +2189,7 @@ int Screen::copyLineToStream(int line, } // If the last character is wide, account for it - if (Character::width(characterBuffer[count - 1].character) == 2) + if (Character::width(characterBuffer[count - 1].character, _ignoreWcWidth) == 2) count++; if (appendNewLine) { diff --git a/src/characters/Character.h b/src/characters/Character.h index e65d585e1..00a987985 100644 --- a/src/characters/Character.h +++ b/src/characters/Character.h @@ -226,9 +226,9 @@ public: } } - int width() const + int width(bool ignoreWcWidth = false) const { - return width(character); + return width(character, ignoreWcWidth); } int repl() const @@ -309,7 +309,7 @@ public: return false; } - static int width(uint ucs4) + static int width(uint ucs4, bool ignoreWcWidth = false) { // ASCII if (ucs4 >= 0x20 && ucs4 < 0x7f) @@ -326,10 +326,15 @@ public: if ((ucs4 > 0x0 && ucs4 < 0x20) || (ucs4 >= 0x7F && ucs4 < 0xA0)) return -1; + if (ignoreWcWidth && 0x04DC0 <= ucs4 && ucs4 <= 0x04DFF) { + // Yijing Hexagram Symbols have wcwidth 2, but unicode width 1 + return 1; + } + return characterWidth(ucs4); } - static int stringWidth(const char32_t *ucs4Str, int len) + static int stringWidth(const char32_t *ucs4Str, int len, bool ignoreWcWidth = false) { int w = 0; Hangul::SyllablePos hangulSyllablePos = Hangul::NotInSyllable; @@ -338,19 +343,19 @@ public: const uint c = ucs4Str[i]; if (!Hangul::isHangul(c)) { - w += width(c); + w += width(c, ignoreWcWidth); hangulSyllablePos = Hangul::NotInSyllable; } else { - w += Hangul::width(c, width(c), hangulSyllablePos); + w += Hangul::width(c, width(c, ignoreWcWidth), hangulSyllablePos); } } return w; } - inline static int stringWidth(const QString &str) + inline static int stringWidth(const QString &str, bool ignoreWcWidth = false) { const auto ucs4Str = str.toStdU32String(); - return stringWidth(ucs4Str.data(), ucs4Str.size()); + return stringWidth(ucs4Str.data(), ucs4Str.size()), ignoreWcWidth; } inline uint baseCodePoint() const diff --git a/src/characters/CharacterWidth.cpp b/src/characters/CharacterWidth.cpp index 9f8c48a97..36f27fd6e 100644 --- a/src/characters/CharacterWidth.cpp +++ b/src/characters/CharacterWidth.cpp @@ -60,16 +60,16 @@ static constexpr const Range LUT_2[] = { {0x002728,0x002728},{0x00274c,0x00274c},{0x00274e,0x00274e},{0x002753,0x002755},{0x002757,0x002757},{0x002795,0x002797},{0x0027b0,0x0027b0},{0x0027bf,0x0027bf}, {0x002b1b,0x002b1c},{0x002b50,0x002b50},{0x002b55,0x002b55},{0x002e80,0x002e99},{0x002e9b,0x002ef3},{0x002f00,0x002fd5},{0x002ff0,0x002ffb},{0x003000,0x003029}, {0x00302e,0x00303e},{0x003041,0x003096},{0x00309b,0x0030ff},{0x003105,0x00312f},{0x003131,0x00318e},{0x003190,0x0031e3},{0x0031f0,0x00321e},{0x003220,0x003247}, - {0x003250,0x004dbf},{0x004e00,0x00a48c},{0x00a490,0x00a4c6},{0x00a960,0x00a97c},{0x00ac00,0x00d7a3},{0x00f900,0x00faff},{0x00fe10,0x00fe19},{0x00fe30,0x00fe52}, - {0x00fe54,0x00fe66},{0x00fe68,0x00fe6b},{0x00ff01,0x00ff60},{0x00ffe0,0x00ffe6},{0x016fe0,0x016fe3},{0x016ff0,0x016ff1},{0x017000,0x0187f7},{0x018800,0x018cd5}, - {0x018d00,0x018d08},{0x01aff0,0x01aff3},{0x01aff5,0x01affb},{0x01affd,0x01affe},{0x01b000,0x01b122},{0x01b132,0x01b132},{0x01b150,0x01b152},{0x01b155,0x01b155}, - {0x01b164,0x01b167},{0x01b170,0x01b2fb},{0x01f004,0x01f004},{0x01f0cf,0x01f0cf},{0x01f18e,0x01f18e},{0x01f191,0x01f19a},{0x01f1e6,0x01f202},{0x01f210,0x01f23b}, - {0x01f240,0x01f248},{0x01f250,0x01f251},{0x01f260,0x01f265},{0x01f300,0x01f320},{0x01f32d,0x01f335},{0x01f337,0x01f37c},{0x01f37e,0x01f393},{0x01f3a0,0x01f3ca}, - {0x01f3cf,0x01f3d3},{0x01f3e0,0x01f3f0},{0x01f3f4,0x01f3f4},{0x01f3f8,0x01f43e},{0x01f440,0x01f440},{0x01f442,0x01f4fc},{0x01f4ff,0x01f53d},{0x01f54b,0x01f54e}, - {0x01f550,0x01f567},{0x01f57a,0x01f57a},{0x01f595,0x01f596},{0x01f5a4,0x01f5a4},{0x01f5fb,0x01f64f},{0x01f680,0x01f6c5},{0x01f6cc,0x01f6cc},{0x01f6d0,0x01f6d2}, - {0x01f6d5,0x01f6d7},{0x01f6dc,0x01f6df},{0x01f6eb,0x01f6ec},{0x01f6f4,0x01f6fc},{0x01f7e0,0x01f7eb},{0x01f7f0,0x01f7f0},{0x01f90c,0x01f93a},{0x01f93c,0x01f945}, - {0x01f947,0x01f9ff},{0x01fa70,0x01fa7c},{0x01fa80,0x01fa88},{0x01fa90,0x01fabd},{0x01fabf,0x01fac5},{0x01face,0x01fadb},{0x01fae0,0x01fae8},{0x01faf0,0x01faf8}, - {0x020000,0x02fffd},{0x030000,0x03fffd}, + {0x003250,0x00a48c},{0x00a490,0x00a4c6},{0x00a960,0x00a97c},{0x00ac00,0x00d7a3},{0x00f900,0x00faff},{0x00fe10,0x00fe19},{0x00fe30,0x00fe52},{0x00fe54,0x00fe66}, + {0x00fe68,0x00fe6b},{0x00ff01,0x00ff60},{0x00ffe0,0x00ffe6},{0x016fe0,0x016fe3},{0x016ff0,0x016ff1},{0x017000,0x0187f7},{0x018800,0x018cd5},{0x018d00,0x018d08}, + {0x01aff0,0x01aff3},{0x01aff5,0x01affb},{0x01affd,0x01affe},{0x01b000,0x01b122},{0x01b132,0x01b132},{0x01b150,0x01b152},{0x01b155,0x01b155},{0x01b164,0x01b167}, + {0x01b170,0x01b2fb},{0x01f004,0x01f004},{0x01f0cf,0x01f0cf},{0x01f18e,0x01f18e},{0x01f191,0x01f19a},{0x01f1e6,0x01f202},{0x01f210,0x01f23b},{0x01f240,0x01f248}, + {0x01f250,0x01f251},{0x01f260,0x01f265},{0x01f300,0x01f320},{0x01f32d,0x01f335},{0x01f337,0x01f37c},{0x01f37e,0x01f393},{0x01f3a0,0x01f3ca},{0x01f3cf,0x01f3d3}, + {0x01f3e0,0x01f3f0},{0x01f3f4,0x01f3f4},{0x01f3f8,0x01f43e},{0x01f440,0x01f440},{0x01f442,0x01f4fc},{0x01f4ff,0x01f53d},{0x01f54b,0x01f54e},{0x01f550,0x01f567}, + {0x01f57a,0x01f57a},{0x01f595,0x01f596},{0x01f5a4,0x01f5a4},{0x01f5fb,0x01f64f},{0x01f680,0x01f6c5},{0x01f6cc,0x01f6cc},{0x01f6d0,0x01f6d2},{0x01f6d5,0x01f6d7}, + {0x01f6dc,0x01f6df},{0x01f6eb,0x01f6ec},{0x01f6f4,0x01f6fc},{0x01f7e0,0x01f7eb},{0x01f7f0,0x01f7f0},{0x01f90c,0x01f93a},{0x01f93c,0x01f945},{0x01f947,0x01f9ff}, + {0x01fa70,0x01fa7c},{0x01fa80,0x01fa88},{0x01fa90,0x01fabd},{0x01fabf,0x01fac5},{0x01face,0x01fadb},{0x01fae0,0x01fae8},{0x01faf0,0x01faf8},{0x020000,0x02fffd}, + {0x030000,0x03fffd}, }; static constexpr const Range LUT_0[] = { @@ -123,7 +123,7 @@ static constexpr const Range LUT_0[] = { static constexpr const RangeLut RANGE_LUT_LIST[] = { {-1, LUT_NONPRINTABLE, 1}, - { 2, LUT_2 , 122}, + { 2, LUT_2 , 121}, { 0, LUT_0 , 359}, { 1, nullptr , 1}, }; diff --git a/tools/uni2characterwidth/overrides.txt b/tools/uni2characterwidth/overrides.txt index c8db86f13..294c7f1d3 100644 --- a/tools/uni2characterwidth/overrides.txt +++ b/tools/uni2characterwidth/overrides.txt @@ -1,4 +1,5 @@ 000AD ; 1 # (­) Soft Hyphen (originally 0) +04DC0..04DFF ; 2 # YIJING hexagrams 01160..011FF ; 0 # (ᅟᅠ..ᅟᅠᇿ) Hangul Jamo Jungseong and Jongseong (originally 1) 0D7B0..0D7C6 ; 0 # (ᅟힰ..ᅟퟆ) Hangul Jamo Jungseong (complex vowels) for Old Korean (originally 1) 0D7CB..0D7FB ; 0 # (ᅟᅠퟋ..ᅟᅠퟻ) Hangul Jamo Jongseong (trailing consonants) for Old Korean (originally 1)