From ceea2378f6384454611bde05501c0782df0df4b8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6ran=20Karl?= <3951388+JoeKar@users.noreply.github.com> Date: Mon, 5 Feb 2024 20:47:45 +0100 Subject: [PATCH] buffer: Build the lines with runes --- internal/buffer/line_array.go | 9 ++++++- internal/util/unicode.go | 48 +++++++++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+), 1 deletion(-) diff --git a/internal/buffer/line_array.go b/internal/buffer/line_array.go index eecb6b59..7a4fc05b 100644 --- a/internal/buffer/line_array.go +++ b/internal/buffer/line_array.go @@ -44,7 +44,8 @@ type searchState struct { // A Line contains the data in bytes as well as a highlight state, match // and a flag for whether the highlighting needs to be updated type Line struct { - data []byte + data []byte + runes []rune state highlight.State match highlight.LineMatch @@ -146,8 +147,10 @@ func NewLineArray(size uint64, endings FileFormat, reader io.Reader) *LineArray if err != nil { if err == io.EOF { + runes, _ := util.DecodeCharacters(data) la.lines = Append(la.lines, Line{ data: data, + runes: runes, state: nil, match: nil, }) @@ -155,8 +158,10 @@ func NewLineArray(size uint64, endings FileFormat, reader io.Reader) *LineArray // Last line was read break } else { + runes, _ := util.DecodeCharacters(data[:dlen-1]) la.lines = Append(la.lines, Line{ data: data[:dlen-1], + runes: runes, state: nil, match: nil, }) @@ -189,12 +194,14 @@ func (la *LineArray) Bytes() []byte { func (la *LineArray) newlineBelow(y int) { la.lines = append(la.lines, Line{ data: []byte{' '}, + runes: []rune{}, state: nil, match: nil, }) copy(la.lines[y+2:], la.lines[y+1:]) la.lines[y+1] = Line{ data: []byte{}, + runes: []rune{}, state: la.lines[y].state, match: nil, } diff --git a/internal/util/unicode.go b/internal/util/unicode.go index 14243e68..9c05cdfd 100644 --- a/internal/util/unicode.go +++ b/internal/util/unicode.go @@ -64,6 +64,54 @@ func DecodeCharacterInString(str string) (rune, []rune, int) { return r, combc, size } +// DecodeCharacters returns the characters from an array of bytes +func DecodeCharacters(b []byte) ([]rune, int) { + var runes []rune + size := 0 + + for len(b) > 0 { + r, s := utf8.DecodeRune(b) + runes = append(runes, r) + size += s + b = b[s:] + r, s = utf8.DecodeRune(b) + + for isMark(r) { + runes = append(runes, r) + size += s + + b = b[s:] + r, s = utf8.DecodeRune(b) + } + } + + return runes, size +} + +// DecodeCharactersInString returns characters from a string +func DecodeCharactersInString(str string) ([]rune, int) { + var runes []rune + size := 0 + + for len(str) > 0 { + r, s := utf8.DecodeRuneInString(str) + runes = append(runes, r) + size += s + str = str[s:] + r, s = utf8.DecodeRuneInString(str) + + for isMark(r) { + runes = append(runes, r) + size += s + + str = str[s:] + r, s = utf8.DecodeRuneInString(str) + } + } + + return runes, size +} + // CharacterCount returns the number of characters in a byte array // Similar to utf8.RuneCount but for unicode characters func CharacterCount(b []byte) int {