From bdff22187039aca1cfae8d09ba329cfa6d99a6a8 Mon Sep 17 00:00:00 2001 From: Zachary Yedidia Date: Wed, 20 May 2020 16:43:12 -0400 Subject: [PATCH] Use DecodeCharacter over DecodeRune --- internal/buffer/buffer.go | 2 +- internal/buffer/cursor.go | 2 +- internal/buffer/line_array.go | 3 +- internal/display/bufwindow.go | 24 ++++++------ internal/display/infowindow.go | 11 +++--- internal/display/statusline.go | 10 +++-- internal/display/termwindow.go | 5 ++- internal/util/unicode.go | 68 ++++++++++++++++++++++++++++++++++ internal/util/util.go | 14 +++---- 9 files changed, 106 insertions(+), 33 deletions(-) create mode 100644 internal/util/unicode.go diff --git a/internal/buffer/buffer.go b/internal/buffer/buffer.go index 5525b1b7..ab9ada06 100644 --- a/internal/buffer/buffer.go +++ b/internal/buffer/buffer.go @@ -474,7 +474,7 @@ func (b *Buffer) RuneAt(loc Loc) rune { if len(line) > 0 { i := 0 for len(line) > 0 { - r, size := utf8.DecodeRune(line) + r, _, size := util.DecodeCharacter(line) line = line[size:] i++ diff --git a/internal/buffer/cursor.go b/internal/buffer/cursor.go index 0d1b9d84..4bd94808 100644 --- a/internal/buffer/cursor.go +++ b/internal/buffer/cursor.go @@ -436,7 +436,7 @@ func (c *Cursor) RuneUnder(x int) rune { } i := 0 for len(line) > 0 { - r, size := utf8.DecodeRune(line) + r, _, size := util.DecodeCharacter(line) line = line[size:] if i == x { diff --git a/internal/buffer/line_array.go b/internal/buffer/line_array.go index 48ee1d15..c47a3948 100644 --- a/internal/buffer/line_array.go +++ b/internal/buffer/line_array.go @@ -8,6 +8,7 @@ import ( "unicode/utf8" "github.com/zyedidia/micro/v2/pkg/highlight" + "github.com/zyedidia/micro/v2/internal/util" ) // Finds the byte index of the nth rune in a byte slice @@ -19,7 +20,7 @@ func runeToByteIndex(n int, txt []byte) int { count := 0 i := 0 for len(txt) > 0 { - _, size := utf8.DecodeRune(txt) + _, _, size := util.DecodeCharacter(txt) txt = txt[size:] count += size diff --git a/internal/display/bufwindow.go b/internal/display/bufwindow.go index fe7b8631..87bea8d4 100644 --- a/internal/display/bufwindow.go +++ b/internal/display/bufwindow.go @@ -2,7 +2,6 @@ package display import ( "strconv" - "unicode/utf8" runewidth "github.com/mattn/go-runewidth" "github.com/zyedidia/micro/v2/internal/buffer" @@ -73,9 +72,9 @@ func (w *BufWindow) getStartInfo(n, lineN int) ([]byte, int, int, *tcell.Style) curStyle := config.DefStyle var s *tcell.Style for len(b) > 0 { - r, size := utf8.DecodeRune(b) + r, _, size := util.DecodeCharacter(b) - curStyle, found := w.getStyle(curStyle, bloc, r) + curStyle, found := w.getStyle(curStyle, bloc) if found { s = &curStyle } @@ -237,7 +236,7 @@ func (w *BufWindow) LocFromVisual(svloc buffer.Loc) buffer.Loc { return bloc } - r, size := utf8.DecodeRune(line) + r, _, size := util.DecodeCharacter(line) draw() width := 0 @@ -360,7 +359,7 @@ func (w *BufWindow) drawLineNum(lineNumStyle tcell.Style, softwrapped bool, maxL // getStyle returns the highlight style for the given character position // If there is no change to the current highlight style it just returns that -func (w *BufWindow) getStyle(style tcell.Style, bloc buffer.Loc, r rune) (tcell.Style, bool) { +func (w *BufWindow) getStyle(style tcell.Style, bloc buffer.Loc) (tcell.Style, bool) { if group, ok := w.Buf.Match(bloc.Y)[bloc.X]; ok { s := config.GetColor(group.String()) return s, true @@ -510,7 +509,7 @@ func (w *BufWindow) displayBuffer() { } bloc.X = bslice - draw := func(r rune, style tcell.Style, showcursor bool) { + draw := func(r rune, combc []rune, style tcell.Style, showcursor bool) { if nColsBeforeStart <= 0 { for _, c := range cursors { if c.HasSelection() && @@ -568,7 +567,7 @@ func (w *BufWindow) displayBuffer() { } } - screen.SetContent(w.X+vloc.X, w.Y+vloc.Y, r, nil, style) + screen.SetContent(w.X+vloc.X, w.Y+vloc.Y, r, combc, style) if showcursor { for _, c := range cursors { @@ -584,10 +583,11 @@ func (w *BufWindow) displayBuffer() { totalwidth := w.StartCol - nColsBeforeStart for len(line) > 0 { - r, size := utf8.DecodeRune(line) - curStyle, _ = w.getStyle(curStyle, bloc, r) + r, combc, size := util.DecodeCharacter(line) - draw(r, curStyle, true) + curStyle, _ = w.getStyle(curStyle, bloc) + + draw(r, combc, curStyle, true) width := 0 @@ -604,7 +604,7 @@ func (w *BufWindow) displayBuffer() { // Draw any extra characters either spaces for tabs or @ for incomplete wide runes if width > 1 { for i := 1; i < width; i++ { - draw(char, curStyle, false) + draw(char, nil, curStyle, false) } } bloc.X++ @@ -659,7 +659,7 @@ func (w *BufWindow) displayBuffer() { } if vloc.X != bufWidth { - draw(' ', curStyle, true) + draw(' ', nil, curStyle, true) } bloc.X = w.StartCol diff --git a/internal/display/infowindow.go b/internal/display/infowindow.go index e3c09670..c3e016f7 100644 --- a/internal/display/infowindow.go +++ b/internal/display/infowindow.go @@ -92,7 +92,7 @@ func (i *InfoWindow) displayBuffer() { line, nColsBeforeStart, bslice := util.SliceVisualEnd(line, blocX, tabsize) blocX = bslice - draw := func(r rune, style tcell.Style) { + draw := func(r rune, combc []rune, style tcell.Style) { if nColsBeforeStart <= 0 { bloc := buffer.Loc{X: blocX, Y: 0} if activeC.HasSelection() && @@ -112,8 +112,9 @@ func (i *InfoWindow) displayBuffer() { c := r if j > 0 { c = ' ' + combc = nil } - screen.SetContent(vlocX, i.Y, c, nil, style) + screen.SetContent(vlocX, i.Y, c, combc, style) } vlocX++ } @@ -124,9 +125,9 @@ func (i *InfoWindow) displayBuffer() { for len(line) > 0 { curVX := vlocX curBX := blocX - r, size := utf8.DecodeRune(line) + r, combc, size := util.DecodeCharacter(line) - draw(r, i.defStyle()) + draw(r, combc, i.defStyle()) width := 0 @@ -146,7 +147,7 @@ func (i *InfoWindow) displayBuffer() { // Draw any extra characters either spaces for tabs or @ for incomplete wide runes if width > 1 { for j := 1; j < width; j++ { - draw(char, i.defStyle()) + draw(char, nil, i.defStyle()) } } if activeC.X == curBX { diff --git a/internal/display/statusline.go b/internal/display/statusline.go index 19ab77f6..63ac9c99 100644 --- a/internal/display/statusline.go +++ b/internal/display/statusline.go @@ -174,28 +174,30 @@ func (s *StatusLine) Display() { winX := s.win.X for x := 0; x < s.win.Width; x++ { if x < leftLen { - r, size := utf8.DecodeRune(leftText) + r, combc, size := util.DecodeCharacter(leftText) leftText = leftText[size:] rw := runewidth.RuneWidth(r) for j := 0; j < rw; j++ { c := r if j > 0 { c = ' ' + combc = nil x++ } - screen.SetContent(winX+x, y, c, nil, statusLineStyle) + screen.SetContent(winX+x, y, c, combc, statusLineStyle) } } else if x >= s.win.Width-rightLen && x < rightLen+s.win.Width-rightLen { - r, size := utf8.DecodeRune(rightText) + r, combc, size := util.DecodeCharacter(rightText) rightText = rightText[size:] rw := runewidth.RuneWidth(r) for j := 0; j < rw; j++ { c := r if j > 0 { c = ' ' + combc = nil x++ } - screen.SetContent(winX+x, y, c, nil, statusLineStyle) + screen.SetContent(winX+x, y, c, combc, statusLineStyle) } } else { screen.SetContent(winX+x, y, ' ', nil, statusLineStyle) diff --git a/internal/display/termwindow.go b/internal/display/termwindow.go index 256cf540..ec161bcd 100644 --- a/internal/display/termwindow.go +++ b/internal/display/termwindow.go @@ -7,6 +7,7 @@ import ( "github.com/zyedidia/micro/v2/internal/config" "github.com/zyedidia/micro/v2/internal/screen" "github.com/zyedidia/micro/v2/internal/shell" + "github.com/zyedidia/micro/v2/internal/util" "github.com/zyedidia/tcell" "github.com/zyedidia/terminal" ) @@ -101,9 +102,9 @@ func (w *TermWindow) Display() { textLen := utf8.RuneCount(text) for x := 0; x < w.Width; x++ { if x < textLen { - r, size := utf8.DecodeRune(text) + r, combc, size := util.DecodeCharacter(text) text = text[size:] - screen.SetContent(w.X+x, w.Y+w.Height, r, nil, statusLineStyle) + screen.SetContent(w.X+x, w.Y+w.Height, r, combc, statusLineStyle) } else { screen.SetContent(w.X+x, w.Y+w.Height, ' ', nil, statusLineStyle) } diff --git a/internal/util/unicode.go b/internal/util/unicode.go new file mode 100644 index 00000000..98a17c42 --- /dev/null +++ b/internal/util/unicode.go @@ -0,0 +1,68 @@ +package util + +import ( + "unicode" + "unicode/utf8" +) + +// combining character range table +var combining = &unicode.RangeTable{ + R16: []unicode.Range16{ + {0x0300, 0x036f, 1}, // combining diacritical marks + {0x1ab0, 0x1aff, 1}, // combining diacritical marks extended + {0x1dc0, 0x1dff, 1}, // combining diacritical marks supplement + {0x20d0, 0x20ff, 1}, // combining diacritical marks for symbols + {0xfe20, 0xfe2f, 1}, // combining half marks + }, +} + + +// DecodeCharacter returns the next character from an array of bytes +// A character is a rune along with any accompanying combining runes +func DecodeCharacter(b []byte) (rune, []rune, int) { + r, size := utf8.DecodeRune(b) + b = b[size:] + c, s := utf8.DecodeRune(b) + + var combc []rune + for unicode.In(c, combining) { + combc = append(combc, c) + size += s + + b = b[s:] + c, s = utf8.DecodeRune(b) + } + + return r, combc, size +} + +// CharacterCount returns the number of characters in a byte array +// Similar to utf8.RuneCount but for unicode characters +func CharacterCount(b []byte) int { + s := 0 + + for len(b) > 0 { + r, size := utf8.DecodeRune(b) + if !unicode.In(r, combining) { + s++ + } + + b = b[size:] + } + + return s +} + +// CharacterCount returns the number of characters in a string +// Similar to utf8.RuneCountInString but for unicode characters +func CharacterCountInString(str string) int { + s := 0 + + for _, r := range str { + if !unicode.In(r, combining) { + s++ + } + } + + return s +} diff --git a/internal/util/util.go b/internal/util/util.go index 841363ce..44a9a12f 100644 --- a/internal/util/util.go +++ b/internal/util/util.go @@ -64,7 +64,7 @@ func SliceEnd(slc []byte, index int) []byte { return slc[totalSize:] } - _, size := utf8.DecodeRune(slc[totalSize:]) + _, _, size := DecodeCharacter(slc[totalSize:]) totalSize += size i++ } @@ -101,7 +101,7 @@ func SliceStart(slc []byte, index int) []byte { return slc[:totalSize] } - _, size := utf8.DecodeRune(slc[totalSize:]) + _, _, size := DecodeCharacter(slc[totalSize:]) totalSize += size i++ } @@ -135,7 +135,7 @@ func SliceVisualEnd(b []byte, n, tabsize int) ([]byte, int, int) { width := 0 i := 0 for len(b) > 0 { - r, size := utf8.DecodeRune(b) + r, _, size := DecodeCharacter(b) w := 0 switch r { @@ -172,7 +172,7 @@ func StringWidth(b []byte, n, tabsize int) int { i := 0 width := 0 for len(b) > 0 { - r, size := utf8.DecodeRune(b) + r, _, size := DecodeCharacter(b) b = b[size:] switch r { @@ -265,7 +265,7 @@ func IsBytesWhitespace(b []byte) bool { // RunePos returns the rune index of a given byte index // Make sure the byte index is not between code points func RunePos(b []byte, i int) int { - return utf8.RuneCount(b[:i]) + return CharacterCount(b[:i]) } // MakeRelative will attempt to make a relative path between path and base @@ -344,7 +344,7 @@ func EscapePath(path string) string { func GetLeadingWhitespace(b []byte) []byte { ws := []byte{} for len(b) > 0 { - r, size := utf8.DecodeRune(b) + r, _, size := DecodeCharacter(b) if r == ' ' || r == '\t' { ws = append(ws, byte(r)) } else { @@ -370,7 +370,7 @@ func GetCharPosInLine(b []byte, visualPos int, tabsize int) int { i := 0 // char pos width := 0 // string visual width for len(b) > 0 { - r, size := utf8.DecodeRune(b) + r, _, size := DecodeCharacter(b) b = b[size:] switch r {