From 8facc4d3ef6ffed1e4006a7549a3c03a1a5b7d83 Mon Sep 17 00:00:00 2001 From: Lars Viklund Date: Fri, 19 Apr 2024 03:58:25 +0200 Subject: [PATCH] fix: enable Unicode separators and caret motions As the runtime is going to support Unicode installation locations and build directories, some UTF-8 text is going to reach the Lua side of the project. This includes the script path, the user path, any paths yielded from file searches and also imported character names from accounts. Care needs to be taken in many places where string operations are performed as no longer does a byte necessarily correspond to a single character and anything that truncates, reverses or otherwise slices strings could need an audit. This change fixes cursor movement in `EditControl`s with the arrow keys as those historically used string matching and byte offsets. It also ensures that the use of arbitrary Unicode codepoints as decimal and thousands separators works correctly as the previous code used unaware reversing and slicing. --- src/Classes/EditControl.lua | 31 ++++++++----------------------- src/Modules/Common.lua | 12 +++++++----- 2 files changed, 15 insertions(+), 28 deletions(-) diff --git a/src/Classes/EditControl.lua b/src/Classes/EditControl.lua index 63eedfa1e..2964057ad 100644 --- a/src/Classes/EditControl.lua +++ b/src/Classes/EditControl.lua @@ -7,6 +7,7 @@ local m_max = math.max local m_min = math.min local m_floor = math.floor local protected_replace = "*" +local utf8 = require('lua-utf8') local function lastLine(str) local lastLineIndex = 1 @@ -541,18 +542,10 @@ function EditClass:OnKeyDown(key, doubleClick) if self.caret > 1 then if ctrl then -- Skip leading space, then jump word - while self.buf:sub(self.caret-1, self.caret-1):match("[%s%p]") do - if self.caret > 1 then - self.caret = self.caret - 1 - end - end - while self.buf:sub(self.caret-1, self.caret-1):match("%w") do - if self.caret > 1 then - self.caret = self.caret - 1 - end - end + self.caret = self.caret - #utf8.match(self.buf:sub(1, self.caret-1), "[%s%p]*$") + self.caret = self.caret - #utf8.match(self.buf:sub(1, self.caret-1), "%w*$") else - self.caret = self.caret - 1 + self.caret = utf8.next(self.buf, self.caret, -1) or 0 end self.lastUndoState.caret = self.caret self:ScrollCaretIntoView() @@ -562,19 +555,11 @@ function EditClass:OnKeyDown(key, doubleClick) self.sel = shift and (self.sel or self.caret) or nil if self.caret <= #self.buf then if ctrl then - -- Jump word, then skip trailing space, - while self.buf:sub(self.caret, self.caret):match("%w") do - if self.caret <= #self.buf then - self.caret = self.caret + 1 - end - end - while self.buf:sub(self.caret, self.caret):match("[%s%p]") do - if self.caret <= #self.buf then - self.caret = self.caret + 1 - end - end + -- Jump word, then skip trailing space, + self.caret = self.caret + #utf8.match(self.buf:sub(self.caret), "^%w*") + self.caret = self.caret + #utf8.match(self.buf:sub(self.caret), "^[%s%p]*") else - self.caret = self.caret + 1 + self.caret = utf8.next(self.buf, self.caret, 1) or #self.buf + 1 end self.lastUndoState.caret = self.caret self:ScrollCaretIntoView() diff --git a/src/Modules/Common.lua b/src/Modules/Common.lua index 3c99c20d2..91877ebc8 100644 --- a/src/Modules/Common.lua +++ b/src/Modules/Common.lua @@ -26,6 +26,7 @@ common.curl = require("lcurl.safe") common.xml = require("xml") common.base64 = require("base64") common.sha1 = require("sha1") +local utf8 = require('lua-utf8') -- Try to load a library return nil if failed. https://stackoverflow.com/questions/34965863/lua-require-fallback-error-handling function prerequire(...) @@ -723,20 +724,21 @@ function formatNumSep(str) end local x, y, minus, integer, fraction = str:find("(-?)(%d+)(%.?%d*)") if main.showThousandsSeparators then - integer = integer:reverse():gsub("(%d%d%d)", "%1"..main.thousandsSeparator):reverse() + rev1kSep = utf8.reverse(main.thousandsSeparator) + integer = utf8.reverse(utf8.gsub(utf8.reverse(integer), "(%d%d%d)", "%1"..rev1kSep)) -- There will be leading separators if the number of digits are divisible by 3 -- This checks for their presence and removes them -- Don't use patterns here because thousandsSeparator can be a pattern control character, and will crash if used if main.thousandsSeparator ~= "" then - local thousandsSeparator = string.find(integer, main.thousandsSeparator, 1, 2) + local thousandsSeparator = utf8.find(integer, rev1kSep, 1, 2) if thousandsSeparator and thousandsSeparator == 1 then - integer = integer:sub(2) + integer = utf8.sub(integer, 2) end end else - integer = integer:reverse():gsub("(%d%d%d)", "%1"):reverse() + integer = utf8.reverse(utf8.gsub(utf8.reverse(integer), "(%d%d%d)", "%1")) end - return colour..minus..integer..fraction:gsub("%.", main.decimalSeparator) + return colour..minus..integer..utf8.gsub(fraction, "%.", main.decimalSeparator) end) end