Skip to content

Commit 7fc2f32

Browse files
authored
fix(multibyte): handle utf-8 multibyte characters in text manipulation operations (#146)
1 parent 40b9c9a commit 7fc2f32

7 files changed

Lines changed: 279 additions & 34 deletions

File tree

lua/markdown-plus/footnotes/insertion.lua

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,10 @@ function M.insert_footnote()
123123
-- Insert reference after the character the cursor is on
124124
local line = vim.api.nvim_buf_get_lines(bufnr, row - 1, row, false)[1]
125125
local reference = "[^" .. id .. "]"
126-
local new_line = line:sub(1, col + 1) .. reference .. line:sub(col + 2)
126+
127+
-- Use UTF-8 safe split to handle multibyte characters correctly
128+
local before, after = utils.split_after_cursor(line, col)
129+
local new_line = before .. reference .. after
127130
vim.api.nvim_buf_set_lines(bufnr, row - 1, row, false, { new_line })
128131

129132
-- If definition already exists, just insert the reference and notify

lua/markdown-plus/format/init.lua

Lines changed: 43 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -280,7 +280,7 @@ end
280280
function M.get_word_boundaries()
281281
local cursor = utils.get_cursor()
282282
local row = cursor[1]
283-
local col = cursor[2]
283+
local col = cursor[2] -- 0-indexed byte offset
284284
local line = utils.get_current_line()
285285

286286
-- Define what characters are considered word boundaries (stop points)
@@ -305,38 +305,62 @@ function M.get_word_boundaries()
305305
return false
306306
end
307307

308-
-- Find word start
309-
local word_start = col
310-
while word_start > 0 do
311-
local char = line:sub(word_start, word_start)
308+
-- Convert byte offset to character index for iteration
309+
local char_idx = vim.fn.charidx(line, col)
310+
if char_idx < 0 then
311+
char_idx = 0
312+
end
313+
314+
-- Get total character count
315+
local total_chars = vim.fn.strcharlen(line)
316+
317+
-- Find word start (iterate backwards by character)
318+
local word_start_char = char_idx
319+
while word_start_char > 0 do
320+
local char = vim.fn.strcharpart(line, word_start_char, 1)
312321
if is_word_boundary(char) then
313-
word_start = word_start + 1
322+
word_start_char = word_start_char + 1
314323
break
315324
end
316-
word_start = word_start - 1
325+
word_start_char = word_start_char - 1
317326
end
318-
if word_start == 0 then
319-
word_start = 1
327+
if word_start_char < 0 then
328+
word_start_char = 0
320329
end
321330

322-
-- Find word end
323-
local word_end = col + 1
324-
while word_end <= #line do
325-
local char = line:sub(word_end, word_end)
331+
-- Find word end (iterate forwards by character)
332+
local word_end_char = char_idx + 1
333+
while word_end_char < total_chars do
334+
local char = vim.fn.strcharpart(line, word_end_char, 1)
326335
if is_word_boundary(char) then
327-
word_end = word_end - 1
336+
word_end_char = word_end_char - 1
328337
break
329338
end
330-
word_end = word_end + 1
339+
word_end_char = word_end_char + 1
340+
end
341+
if word_end_char >= total_chars then
342+
word_end_char = total_chars - 1
331343
end
332-
if word_end > #line then
333-
word_end = #line
344+
if word_end_char < 0 then
345+
word_end_char = 0
346+
end
347+
348+
-- Convert character indices back to byte positions (1-indexed for get_text_in_range)
349+
local start_byte = vim.fn.byteidx(line, word_start_char)
350+
if start_byte == -1 then
351+
start_byte = 0
352+
end
353+
local end_byte = vim.fn.byteidx(line, word_end_char + 1)
354+
if end_byte == -1 then
355+
end_byte = #line
356+
else
357+
end_byte = end_byte - 1
334358
end
335359

336360
return {
337361
row = row,
338-
start_col = word_start,
339-
end_col = word_end,
362+
start_col = start_byte + 1, -- Convert to 1-indexed
363+
end_col = end_byte + 1, -- Convert to 1-indexed
340364
}
341365
end
342366

lua/markdown-plus/images/init.lua

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -189,11 +189,13 @@ function M.insert_image()
189189
local line = utils.get_current_line()
190190
local col = cursor[2]
191191

192-
local new_line = line:sub(1, col) .. image .. line:sub(col + 1)
192+
-- Use UTF-8 safe split to handle multibyte characters correctly
193+
local before, after = utils.split_after_cursor(line, col)
194+
local new_line = before .. image .. after
193195
utils.set_line(cursor[1], new_line)
194196

195197
-- Move cursor after the image
196-
utils.set_cursor(cursor[1], col + #image)
198+
utils.set_cursor(cursor[1], #before + #image)
197199

198200
utils.notify("Image inserted")
199201
end

lua/markdown-plus/links/init.lua

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -175,11 +175,13 @@ function M.insert_link()
175175
local line = utils.get_current_line()
176176
local col = cursor[2]
177177

178-
local new_line = line:sub(1, col) .. link .. line:sub(col + 1)
178+
-- Use UTF-8 safe split to handle multibyte characters correctly
179+
local before, after = utils.split_after_cursor(line, col)
180+
local new_line = before .. link .. after
179181
utils.set_line(cursor[1], new_line)
180182

181183
-- Move cursor after the link
182-
utils.set_cursor(cursor[1], col + #link)
184+
utils.set_cursor(cursor[1], #before + #link)
183185

184186
utils.notify("Link inserted")
185187
end

lua/markdown-plus/list/handlers.lua

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -65,8 +65,8 @@ function M.handle_enter()
6565

6666
if not list_info then
6767
-- Not in a list at all, simulate default Enter behavior
68-
local line_before = current_line:sub(1, col)
69-
local line_after = current_line:sub(col + 1)
68+
-- Use UTF-8 safe split to handle multibyte characters correctly
69+
local line_before, line_after = utils.split_at_cursor(current_line, col)
7070

7171
utils.set_line(row, line_before)
7272
utils.insert_line(row + 1, line_after)
@@ -102,8 +102,8 @@ function M.handle_enter()
102102

103103
if should_split then
104104
-- Split content at cursor position
105-
local content_before = current_line:sub(1, col)
106-
local content_after = current_line:sub(col + 1)
105+
-- Use UTF-8 safe split to handle multibyte characters correctly
106+
local content_before, content_after = utils.split_at_cursor(current_line, col)
107107

108108
-- Update current line with content before cursor
109109
utils.set_line(row, content_before)
@@ -142,8 +142,8 @@ function M.continue_list_content()
142142

143143
if not list_info then
144144
-- Not in a list, simulate default Enter behavior
145-
local line_before = current_line:sub(1, col)
146-
local line_after = current_line:sub(col + 1)
145+
-- Use UTF-8 safe split to handle multibyte characters correctly
146+
local line_before, line_after = utils.split_at_cursor(current_line, col)
147147

148148
utils.set_line(row, line_before)
149149
utils.insert_line(row + 1, line_after)
@@ -155,8 +155,8 @@ function M.continue_list_content()
155155
local marker_end = shared.get_content_start_col(list_info)
156156

157157
-- Split line at cursor
158-
local line_before = current_line:sub(1, col)
159-
local line_after = current_line:sub(col + 1)
158+
-- Use UTF-8 safe split to handle multibyte characters correctly
159+
local line_before, line_after = utils.split_at_cursor(current_line, col)
160160

161161
-- Update current line
162162
utils.set_line(row, line_before)
@@ -179,9 +179,11 @@ function M.handle_tab()
179179
local cursor = utils.get_cursor()
180180
local row, col = cursor[1], cursor[2]
181181
local indent = string.rep(" ", vim.bo.shiftwidth or 2)
182-
local new_line = current_line:sub(1, col) .. indent .. current_line:sub(col + 1)
182+
-- Use UTF-8 safe split to handle multibyte characters correctly
183+
local before, after = utils.split_after_cursor(current_line, col)
184+
local new_line = before .. indent .. after
183185
utils.set_line(row, new_line)
184-
utils.set_cursor(row, col + #indent)
186+
utils.set_cursor(row, #before + #indent)
185187
return
186188
end
187189

lua/markdown-plus/utils.lua

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,74 @@ function M.debug_print(...)
105105
end
106106
end
107107

108+
---Split a line at a byte column position, ensuring proper UTF-8 character boundaries.
109+
---Splits BEFORE the character at the cursor position (character at cursor goes to 'after').
110+
---Use this for line splitting operations (e.g., Enter key behavior).
111+
---@param line string The line content
112+
---@param byte_col number 0-indexed byte column (from nvim_win_get_cursor)
113+
---@return string before Text before the cursor position
114+
---@return string after Text from cursor position onwards (including character at cursor)
115+
function M.split_at_cursor(line, byte_col)
116+
if #line == 0 then
117+
return "", ""
118+
end
119+
120+
-- Handle cursor past end of line
121+
if byte_col >= #line then
122+
return line, ""
123+
end
124+
125+
-- Convert 0-indexed byte offset to character index
126+
local char_idx = vim.fn.charidx(line, byte_col)
127+
if char_idx < 0 then
128+
-- Should not happen if byte_col < #line, but be safe
129+
return line, ""
130+
end
131+
132+
-- Get byte position for start of current character
133+
local curr_char_byte = vim.fn.byteidx(line, char_idx)
134+
if curr_char_byte == -1 or curr_char_byte >= #line then
135+
-- Past end of line
136+
return line, ""
137+
end
138+
139+
return line:sub(1, curr_char_byte), line:sub(curr_char_byte + 1)
140+
end
141+
142+
---Split a line after the character at cursor position, ensuring proper UTF-8 character boundaries.
143+
---Splits AFTER the character at the cursor position (character at cursor goes to 'before').
144+
---Use this for insertion operations (e.g., inserting footnotes, links, images after current char).
145+
---@param line string The line content
146+
---@param byte_col number 0-indexed byte column (from nvim_win_get_cursor)
147+
---@return string before Text up to and including the character at cursor
148+
---@return string after Text after the character at cursor
149+
function M.split_after_cursor(line, byte_col)
150+
if #line == 0 then
151+
return "", ""
152+
end
153+
154+
-- Handle cursor past end of line
155+
if byte_col >= #line then
156+
return line, ""
157+
end
158+
159+
-- Convert 0-indexed byte offset to character index
160+
local char_idx = vim.fn.charidx(line, byte_col)
161+
if char_idx < 0 then
162+
-- Should not happen if byte_col < #line, but be safe
163+
return line, ""
164+
end
165+
166+
-- Get byte position for start of next character
167+
local next_char_byte = vim.fn.byteidx(line, char_idx + 1)
168+
if next_char_byte == -1 then
169+
-- char_idx is at or past last character, split at end
170+
return line, ""
171+
end
172+
173+
return line:sub(1, next_char_byte), line:sub(next_char_byte + 1)
174+
end
175+
108176
---Get the byte index of the last byte of a multi-byte character
109177
---When vim.fn.getpos() returns a column position for a multi-byte character,
110178
---it returns the byte index of the FIRST byte of that character.

0 commit comments

Comments
 (0)