Use DecodeCharacter over DecodeRune

This commit is contained in:
Zachary Yedidia
2020-05-20 16:43:12 -04:00
parent 65be5efd83
commit bdff221870
9 changed files with 106 additions and 33 deletions

68
internal/util/unicode.go Normal file
View File

@@ -0,0 +1,68 @@
package util
import (
"unicode"
"unicode/utf8"
)
// combining character range table
var combining = &unicode.RangeTable{
R16: []unicode.Range16{
{0x0300, 0x036f, 1}, // combining diacritical marks
{0x1ab0, 0x1aff, 1}, // combining diacritical marks extended
{0x1dc0, 0x1dff, 1}, // combining diacritical marks supplement
{0x20d0, 0x20ff, 1}, // combining diacritical marks for symbols
{0xfe20, 0xfe2f, 1}, // combining half marks
},
}
// DecodeCharacter returns the next character from an array of bytes
// A character is a rune along with any accompanying combining runes
func DecodeCharacter(b []byte) (rune, []rune, int) {
r, size := utf8.DecodeRune(b)
b = b[size:]
c, s := utf8.DecodeRune(b)
var combc []rune
for unicode.In(c, combining) {
combc = append(combc, c)
size += s
b = b[s:]
c, s = utf8.DecodeRune(b)
}
return r, combc, size
}
// CharacterCount returns the number of characters in a byte array
// Similar to utf8.RuneCount but for unicode characters
func CharacterCount(b []byte) int {
s := 0
for len(b) > 0 {
r, size := utf8.DecodeRune(b)
if !unicode.In(r, combining) {
s++
}
b = b[size:]
}
return s
}
// CharacterCount returns the number of characters in a string
// Similar to utf8.RuneCountInString but for unicode characters
func CharacterCountInString(str string) int {
s := 0
for _, r := range str {
if !unicode.In(r, combining) {
s++
}
}
return s
}

View File

@@ -64,7 +64,7 @@ func SliceEnd(slc []byte, index int) []byte {
return slc[totalSize:]
}
_, size := utf8.DecodeRune(slc[totalSize:])
_, _, size := DecodeCharacter(slc[totalSize:])
totalSize += size
i++
}
@@ -101,7 +101,7 @@ func SliceStart(slc []byte, index int) []byte {
return slc[:totalSize]
}
_, size := utf8.DecodeRune(slc[totalSize:])
_, _, size := DecodeCharacter(slc[totalSize:])
totalSize += size
i++
}
@@ -135,7 +135,7 @@ func SliceVisualEnd(b []byte, n, tabsize int) ([]byte, int, int) {
width := 0
i := 0
for len(b) > 0 {
r, size := utf8.DecodeRune(b)
r, _, size := DecodeCharacter(b)
w := 0
switch r {
@@ -172,7 +172,7 @@ func StringWidth(b []byte, n, tabsize int) int {
i := 0
width := 0
for len(b) > 0 {
r, size := utf8.DecodeRune(b)
r, _, size := DecodeCharacter(b)
b = b[size:]
switch r {
@@ -265,7 +265,7 @@ func IsBytesWhitespace(b []byte) bool {
// RunePos returns the rune index of a given byte index
// Make sure the byte index is not between code points
func RunePos(b []byte, i int) int {
return utf8.RuneCount(b[:i])
return CharacterCount(b[:i])
}
// MakeRelative will attempt to make a relative path between path and base
@@ -344,7 +344,7 @@ func EscapePath(path string) string {
func GetLeadingWhitespace(b []byte) []byte {
ws := []byte{}
for len(b) > 0 {
r, size := utf8.DecodeRune(b)
r, _, size := DecodeCharacter(b)
if r == ' ' || r == '\t' {
ws = append(ws, byte(r))
} else {
@@ -370,7 +370,7 @@ func GetCharPosInLine(b []byte, visualPos int, tabsize int) int {
i := 0 // char pos
width := 0 // string visual width
for len(b) > 0 {
r, size := utf8.DecodeRune(b)
r, _, size := DecodeCharacter(b)
b = b[size:]
switch r {