mirror of
https://github.com/zyedidia/micro.git
synced 2026-03-28 22:08:12 +09:00
Use DecodeCharacter over DecodeRune
This commit is contained in:
68
internal/util/unicode.go
Normal file
68
internal/util/unicode.go
Normal file
@@ -0,0 +1,68 @@
|
||||
package util
|
||||
|
||||
import (
|
||||
"unicode"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
// combining character range table
|
||||
var combining = &unicode.RangeTable{
|
||||
R16: []unicode.Range16{
|
||||
{0x0300, 0x036f, 1}, // combining diacritical marks
|
||||
{0x1ab0, 0x1aff, 1}, // combining diacritical marks extended
|
||||
{0x1dc0, 0x1dff, 1}, // combining diacritical marks supplement
|
||||
{0x20d0, 0x20ff, 1}, // combining diacritical marks for symbols
|
||||
{0xfe20, 0xfe2f, 1}, // combining half marks
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
// DecodeCharacter returns the next character from an array of bytes
|
||||
// A character is a rune along with any accompanying combining runes
|
||||
func DecodeCharacter(b []byte) (rune, []rune, int) {
|
||||
r, size := utf8.DecodeRune(b)
|
||||
b = b[size:]
|
||||
c, s := utf8.DecodeRune(b)
|
||||
|
||||
var combc []rune
|
||||
for unicode.In(c, combining) {
|
||||
combc = append(combc, c)
|
||||
size += s
|
||||
|
||||
b = b[s:]
|
||||
c, s = utf8.DecodeRune(b)
|
||||
}
|
||||
|
||||
return r, combc, size
|
||||
}
|
||||
|
||||
// CharacterCount returns the number of characters in a byte array
|
||||
// Similar to utf8.RuneCount but for unicode characters
|
||||
func CharacterCount(b []byte) int {
|
||||
s := 0
|
||||
|
||||
for len(b) > 0 {
|
||||
r, size := utf8.DecodeRune(b)
|
||||
if !unicode.In(r, combining) {
|
||||
s++
|
||||
}
|
||||
|
||||
b = b[size:]
|
||||
}
|
||||
|
||||
return s
|
||||
}
|
||||
|
||||
// CharacterCount returns the number of characters in a string
|
||||
// Similar to utf8.RuneCountInString but for unicode characters
|
||||
func CharacterCountInString(str string) int {
|
||||
s := 0
|
||||
|
||||
for _, r := range str {
|
||||
if !unicode.In(r, combining) {
|
||||
s++
|
||||
}
|
||||
}
|
||||
|
||||
return s
|
||||
}
|
||||
@@ -64,7 +64,7 @@ func SliceEnd(slc []byte, index int) []byte {
|
||||
return slc[totalSize:]
|
||||
}
|
||||
|
||||
_, size := utf8.DecodeRune(slc[totalSize:])
|
||||
_, _, size := DecodeCharacter(slc[totalSize:])
|
||||
totalSize += size
|
||||
i++
|
||||
}
|
||||
@@ -101,7 +101,7 @@ func SliceStart(slc []byte, index int) []byte {
|
||||
return slc[:totalSize]
|
||||
}
|
||||
|
||||
_, size := utf8.DecodeRune(slc[totalSize:])
|
||||
_, _, size := DecodeCharacter(slc[totalSize:])
|
||||
totalSize += size
|
||||
i++
|
||||
}
|
||||
@@ -135,7 +135,7 @@ func SliceVisualEnd(b []byte, n, tabsize int) ([]byte, int, int) {
|
||||
width := 0
|
||||
i := 0
|
||||
for len(b) > 0 {
|
||||
r, size := utf8.DecodeRune(b)
|
||||
r, _, size := DecodeCharacter(b)
|
||||
|
||||
w := 0
|
||||
switch r {
|
||||
@@ -172,7 +172,7 @@ func StringWidth(b []byte, n, tabsize int) int {
|
||||
i := 0
|
||||
width := 0
|
||||
for len(b) > 0 {
|
||||
r, size := utf8.DecodeRune(b)
|
||||
r, _, size := DecodeCharacter(b)
|
||||
b = b[size:]
|
||||
|
||||
switch r {
|
||||
@@ -265,7 +265,7 @@ func IsBytesWhitespace(b []byte) bool {
|
||||
// RunePos returns the rune index of a given byte index
|
||||
// Make sure the byte index is not between code points
|
||||
func RunePos(b []byte, i int) int {
|
||||
return utf8.RuneCount(b[:i])
|
||||
return CharacterCount(b[:i])
|
||||
}
|
||||
|
||||
// MakeRelative will attempt to make a relative path between path and base
|
||||
@@ -344,7 +344,7 @@ func EscapePath(path string) string {
|
||||
func GetLeadingWhitespace(b []byte) []byte {
|
||||
ws := []byte{}
|
||||
for len(b) > 0 {
|
||||
r, size := utf8.DecodeRune(b)
|
||||
r, _, size := DecodeCharacter(b)
|
||||
if r == ' ' || r == '\t' {
|
||||
ws = append(ws, byte(r))
|
||||
} else {
|
||||
@@ -370,7 +370,7 @@ func GetCharPosInLine(b []byte, visualPos int, tabsize int) int {
|
||||
i := 0 // char pos
|
||||
width := 0 // string visual width
|
||||
for len(b) > 0 {
|
||||
r, size := utf8.DecodeRune(b)
|
||||
r, _, size := DecodeCharacter(b)
|
||||
b = b[size:]
|
||||
|
||||
switch r {
|
||||
|
||||
Reference in New Issue
Block a user