Skip to content

Commit 0b74213

Browse files
authored
fix(bug): correct multi-byte character handling in get_visual_selection (#126)
1 parent 6507297 commit 0b74213

2 files changed

Lines changed: 153 additions & 4 deletions

File tree

lua/markdown-plus/utils.lua

Lines changed: 47 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,34 @@ function M.debug_print(...)
105105
end
106106
end
107107

108+
---Get the byte index of the last byte of a multi-byte character
109+
---When vim.fn.getpos() returns a column position for a multi-byte character,
110+
---it returns the byte index of the FIRST byte of that character.
111+
---This function adjusts it to return the byte index of the LAST byte.
112+
---@param line string The line content
113+
---@param byte_col number The 1-indexed byte column from getpos()
114+
---@return number The 1-indexed byte column of the last byte of the character
115+
function M.get_char_end_byte(line, byte_col)
116+
if byte_col > #line then
117+
return #line
118+
end
119+
120+
-- Convert 1-indexed byte position to 0-indexed for vim.str_utfindex
121+
local char_idx = vim.str_utfindex(line, byte_col - 1)
122+
123+
-- Get the byte index of the next character (0-indexed)
124+
local success, next_byte = pcall(vim.str_byteindex, line, char_idx + 1)
125+
126+
if success and next_byte then
127+
-- next_byte is 0-indexed and points to the start of next char
128+
-- We want the last byte of current char, which is next_byte (in 1-indexed terms)
129+
return next_byte
130+
else
131+
-- We're at the last character or beyond, return line length
132+
return #line
133+
end
134+
end
135+
108136
---Get visual selection range
109137
---@param include_col? boolean Whether to include column info (default: true)
110138
---@return {start_row: number, end_row: number, start_col?: number, end_col?: number}
@@ -134,6 +162,13 @@ function M.get_visual_selection(include_col)
134162
start_col = 1
135163
local end_line = vim.api.nvim_buf_get_lines(0, end_row - 1, end_row, false)[1] or ""
136164
end_col = #end_line
165+
else
166+
-- For character-wise (v) and block-wise (<C-v>) visual modes,
167+
-- adjust end_col to handle multi-byte characters
168+
-- getpos() returns the byte position of the first byte of a multi-byte character
169+
-- We need the byte position of the last byte for proper text extraction
170+
local end_line = vim.api.nvim_buf_get_lines(0, end_row - 1, end_row, false)[1] or ""
171+
end_col = M.get_char_end_byte(end_line, end_col)
137172
end
138173

139174
if include_col then
@@ -154,17 +189,25 @@ function M.get_visual_selection(include_col)
154189
local start_pos = vim.fn.getpos("'<")
155190
local end_pos = vim.fn.getpos("'>")
156191

192+
local start_col = start_pos[3]
193+
local end_col = end_pos[3]
194+
195+
-- Adjust end_col for multi-byte characters in previous visual selection
196+
local end_row = end_pos[2]
197+
local end_line = vim.api.nvim_buf_get_lines(0, end_row - 1, end_row, false)[1] or ""
198+
end_col = M.get_char_end_byte(end_line, end_col)
199+
157200
if include_col then
158201
return {
159202
start_row = start_pos[2],
160-
start_col = start_pos[3],
161-
end_row = end_pos[2],
162-
end_col = end_pos[3],
203+
start_col = start_col,
204+
end_row = end_row,
205+
end_col = end_col,
163206
}
164207
else
165208
return {
166209
start_row = start_pos[2],
167-
end_row = end_pos[2],
210+
end_row = end_row,
168211
}
169212
end
170213
end

spec/markdown-plus/utils_spec.lua

Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,4 +102,110 @@ describe("markdown-plus utils", function()
102102
vim.api.nvim_buf_delete(buf, { force = true })
103103
end)
104104
end)
105+
106+
describe("get_char_end_byte", function()
107+
it("returns correct byte index for ASCII characters", function()
108+
local line = "hello world"
109+
-- ASCII characters are 1 byte each, so end byte = input byte
110+
assert.are.equal(1, utils.get_char_end_byte(line, 1)) -- 'h'
111+
assert.are.equal(6, utils.get_char_end_byte(line, 6)) -- ' '
112+
assert.are.equal(11, utils.get_char_end_byte(line, 11)) -- 'd'
113+
end)
114+
115+
it("returns correct byte index for multi-byte characters", function()
116+
local line = "这是一段文本" -- Each Chinese character is 3 bytes in UTF-8
117+
-- 这 = bytes 1-3
118+
assert.are.equal(3, utils.get_char_end_byte(line, 1))
119+
-- 是 = bytes 4-6
120+
assert.are.equal(6, utils.get_char_end_byte(line, 4))
121+
-- 一 = bytes 7-9
122+
assert.are.equal(9, utils.get_char_end_byte(line, 7))
123+
-- 段 = bytes 10-12
124+
assert.are.equal(12, utils.get_char_end_byte(line, 10))
125+
-- 文 = bytes 13-15
126+
assert.are.equal(15, utils.get_char_end_byte(line, 13))
127+
-- 本 = bytes 16-18 (last character)
128+
assert.are.equal(18, utils.get_char_end_byte(line, 16))
129+
end)
130+
131+
it("returns correct byte index for mixed ASCII and multi-byte", function()
132+
local line = "hello 世界" -- "hello " = 6 bytes, 世=3 bytes, 界=3 bytes
133+
assert.are.equal(1, utils.get_char_end_byte(line, 1)) -- 'h'
134+
assert.are.equal(6, utils.get_char_end_byte(line, 6)) -- ' '
135+
assert.are.equal(9, utils.get_char_end_byte(line, 7)) -- '世' (bytes 7-9)
136+
assert.are.equal(12, utils.get_char_end_byte(line, 10)) -- '界' (bytes 10-12)
137+
end)
138+
139+
it("handles byte position beyond line length", function()
140+
local line = "test"
141+
assert.are.equal(4, utils.get_char_end_byte(line, 100))
142+
end)
143+
end)
144+
145+
describe("get_visual_selection with multi-byte characters", function()
146+
it("handles visual selection of multi-byte characters", function()
147+
local buf = vim.api.nvim_create_buf(false, true)
148+
vim.api.nvim_buf_set_lines(buf, 0, -1, false, { "这是一段文本" })
149+
vim.api.nvim_set_current_buf(buf)
150+
151+
-- Select all text
152+
vim.api.nvim_win_set_cursor(0, { 1, 0 })
153+
vim.cmd("normal! v$")
154+
155+
local selection = utils.get_visual_selection()
156+
157+
-- Should select the entire line
158+
assert.are.equal(1, selection.start_row)
159+
assert.are.equal(1, selection.end_row)
160+
assert.are.equal(1, selection.start_col)
161+
assert.are.equal(18, selection.end_col) -- Full line is 18 bytes
162+
163+
-- Verify the text extraction works correctly
164+
local text =
165+
utils.get_text_in_range(selection.start_row, selection.start_col, selection.end_row, selection.end_col)
166+
assert.are.equal("这是一段文本", text)
167+
168+
vim.api.nvim_buf_delete(buf, { force = true })
169+
end)
170+
171+
it("handles partial selection of multi-byte characters", function()
172+
local buf = vim.api.nvim_create_buf(false, true)
173+
vim.api.nvim_buf_set_lines(buf, 0, -1, false, { "这是一段文本" })
174+
vim.api.nvim_set_current_buf(buf)
175+
176+
-- Select first two characters (这是)
177+
vim.api.nvim_win_set_cursor(0, { 1, 0 })
178+
vim.cmd("normal! v")
179+
vim.cmd("normal! l") -- Move to second char
180+
181+
local selection = utils.get_visual_selection()
182+
183+
-- Should get correct byte positions
184+
assert.are.equal(1, selection.start_col)
185+
assert.are.equal(6, selection.end_col) -- End of second char (是)
186+
187+
local text =
188+
utils.get_text_in_range(selection.start_row, selection.start_col, selection.end_row, selection.end_col)
189+
assert.are.equal("这是", text)
190+
191+
vim.api.nvim_buf_delete(buf, { force = true })
192+
end)
193+
194+
it("handles mixed ASCII and multi-byte selection", function()
195+
local buf = vim.api.nvim_create_buf(false, true)
196+
vim.api.nvim_buf_set_lines(buf, 0, -1, false, { "hello 世界" })
197+
vim.api.nvim_set_current_buf(buf)
198+
199+
-- Select entire line
200+
vim.api.nvim_win_set_cursor(0, { 1, 0 })
201+
vim.cmd("normal! v$")
202+
203+
local selection = utils.get_visual_selection()
204+
local text =
205+
utils.get_text_in_range(selection.start_row, selection.start_col, selection.end_row, selection.end_col)
206+
assert.are.equal("hello 世界", text)
207+
208+
vim.api.nvim_buf_delete(buf, { force = true })
209+
end)
210+
end)
105211
end)

0 commit comments

Comments
 (0)