From 0859f4aa36d69e5cdb92b868dfc08498fd8a1d70 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6ran=20Karl?= <3951388+JoeKar@users.noreply.github.com> Date: Tue, 6 Jun 2023 02:39:12 +0200 Subject: [PATCH] Fix: Syntax highlighting for various issues (#2810) * highlighter: Fix region & pattern detection * syntax/sh: Highlight upper case options too * syntax/c(pp): Try to synchronize the rules to lower the maintenance effort * syntax/ruby: Fix explicit filename detection in directories * highlighter: Respect skip rules in regions * syntax/sh: Fix parameter expansion, cond. flags and generalize filename via "" * syntax/php|vi: Correct strings in comments to comments only Additionally improve vimscript comment handling. * highlighter: Remove problematic start|end check in find(all)Index() ...and additionally remove recursive region end detection --- pkg/highlight/highlighter.go | 38 +++++++--------------------------- runtime/syntax/c.yaml | 10 ++++----- runtime/syntax/cpp.yaml | 4 ++-- runtime/syntax/javascript.yaml | 9 +++----- runtime/syntax/justfile.yaml | 2 +- runtime/syntax/php.yaml | 25 ++++++++++++---------- runtime/syntax/ruby.yaml | 2 +- runtime/syntax/sh.yaml | 12 +++++------ runtime/syntax/vi.yaml | 19 ++++++++--------- 9 files changed, 47 insertions(+), 74 deletions(-) diff --git a/pkg/highlight/highlighter.go b/pkg/highlight/highlighter.go index 50545f04..bee7e9e4 100644 --- a/pkg/highlight/highlighter.go +++ b/pkg/highlight/highlighter.go @@ -96,19 +96,7 @@ func NewHighlighter(def *Def) *Highlighter { // color's group (represented as one byte) type LineMatch map[int]Group -func findIndex(regex *regexp.Regexp, skip *regexp.Regexp, str []byte, canMatchStart, canMatchEnd bool) []int { - regexStr := regex.String() - if strings.Contains(regexStr, "^") { - if !canMatchStart { - return nil - } - } - if strings.Contains(regexStr, "$") { - if !canMatchEnd { - return nil - } - } - +func findIndex(regex *regexp.Regexp, skip *regexp.Regexp, str []byte) []int { var strbytes []byte if skip != nil { strbytes = skip.ReplaceAllFunc(str, func(match []byte) []byte { @@ -127,18 +115,7 @@ func findIndex(regex *regexp.Regexp, skip *regexp.Regexp, str []byte, canMatchSt return []int{runePos(match[0], str), runePos(match[1], str)} } -func findAllIndex(regex *regexp.Regexp, str []byte, canMatchStart, canMatchEnd bool) [][]int { - regexStr := regex.String() - if strings.Contains(regexStr, "^") { - if !canMatchStart { - return nil - } - } - if strings.Contains(regexStr, "$") { - if !canMatchEnd { - return nil - } - } +func findAllIndex(regex *regexp.Regexp, str []byte) [][]int { matches := regex.FindAllIndex(str, -1) for i, m := range matches { matches[i][0] = runePos(m[0], str) @@ -157,7 +134,7 @@ func (h *Highlighter) highlightRegion(highlights LineMatch, start int, canMatchE } } - loc := findIndex(curRegion.end, curRegion.skip, line, start == 0, canMatchEnd) + loc := findIndex(curRegion.end, curRegion.skip, line) if loc != nil { if !statesOnly { highlights[start+loc[0]] = curRegion.limitGroup @@ -165,7 +142,6 @@ func (h *Highlighter) highlightRegion(highlights LineMatch, start int, canMatchE if curRegion.parent == nil { if !statesOnly { highlights[start+loc[1]] = 0 - h.highlightRegion(highlights, start, false, lineNum, sliceEnd(line, loc[0]), curRegion, statesOnly) } h.highlightEmptyRegion(highlights, start+loc[1], canMatchEnd, lineNum, sliceStart(line, loc[1]), statesOnly) return highlights @@ -190,7 +166,7 @@ func (h *Highlighter) highlightRegion(highlights LineMatch, start int, canMatchE var firstRegion *region for _, r := range curRegion.rules.regions { - loc := findIndex(r.start, nil, line, start == 0, canMatchEnd) + loc := findIndex(r.start, r.skip, line) if loc != nil { if loc[0] < firstLoc[0] { firstLoc = loc @@ -214,7 +190,7 @@ func (h *Highlighter) highlightRegion(highlights LineMatch, start int, canMatchE } for _, p := range curRegion.rules.patterns { - matches := findAllIndex(p.regex, line, start == 0, canMatchEnd) + matches := findAllIndex(p.regex, line) for _, m := range matches { for i := m[0]; i < m[1]; i++ { fullHighlights[i] = p.group @@ -247,7 +223,7 @@ func (h *Highlighter) highlightEmptyRegion(highlights LineMatch, start int, canM firstLoc := []int{lineLen, 0} var firstRegion *region for _, r := range h.Def.rules.regions { - loc := findIndex(r.start, nil, line, start == 0, canMatchEnd) + loc := findIndex(r.start, r.skip, line) if loc != nil { if loc[0] < firstLoc[0] { firstLoc = loc @@ -274,7 +250,7 @@ func (h *Highlighter) highlightEmptyRegion(highlights LineMatch, start int, canM fullHighlights := make([]Group, len(line)) for _, p := range h.Def.rules.patterns { - matches := findAllIndex(p.regex, line, start == 0, canMatchEnd) + matches := findAllIndex(p.regex, line) for _, m := range matches { for i := m[0]; i < m[1]; i++ { fullHighlights[i] = p.group diff --git a/runtime/syntax/c.yaml b/runtime/syntax/c.yaml index 97e53d40..cf19b25a 100644 --- a/runtime/syntax/c.yaml +++ b/runtime/syntax/c.yaml @@ -5,11 +5,10 @@ detect: rules: - identifier: "\\b[A-Z_][0-9A-Z_]+\\b" - - type: "\\b(auto|float|double|char|int|short|long|sizeof|enum|void|static|const|struct|union|typedef|extern|(un)?signed|inline)\\b" + - type: "\\b(float|double|bool|char|int|short|long|enum|void|struct|union|typedef|(un)?signed|inline)\\b" - type: "\\b((s?size)|((u_?)?int(8|16|32|64|ptr)))_t\\b" - type: "\\b[a-z_][0-9a-z_]+(_t|_T)\\b" - - type.extended: "\\b(bool)\\b" - - statement: "\\b(volatile|register|restrict)\\b" + - statement: "\\b(auto|volatile|register|restrict|static|const|extern)\\b" - statement: "\\b(for|if|while|do|else|case|default|switch)\\b" - statement: "\\b(goto|continue|break|return)\\b" - preproc: "^[[:space:]]*#[[:space:]]*(define|pragma|include|(un|ifn?)def|endif|el(if|se)|if|warning|error)" @@ -17,7 +16,7 @@ rules: - statement: "__attribute__[[:space:]]*\\(\\([^)]*\\)\\)" - statement: "__(aligned|asm|builtin|hidden|inline|packed|restrict|section|typeof|weak)__" # Operator Color - - symbol.operator: "([.:;,+*|=!\\%]|<|>|/|-|&)" + - symbol.operator: "[-+*/%=<>.:;,~&|^!?]|\\b(sizeof)\\b" - symbol.brackets: "[(){}]|\\[|\\]" # Integer Constants - constant.number: "(\\b([1-9][0-9]*|0[0-7]*|0[Xx][0-9A-Fa-f]+|0[Bb][01]+)([Uu][Ll]?[Ll]?|[Ll][Ll]?[Uu]?)?\\b)" @@ -25,7 +24,7 @@ rules: - constant.number: "(\\b(([0-9]*[.][0-9]+|[0-9]+[.][0-9]*)([Ee][+-]?[0-9]+)?|[0-9]+[Ee][+-]?[0-9]+)[FfLl]?\\b)" # Hexadecimal Floating Constants - constant.number: "(\\b0[Xx]([0-9A-Za-z]*[.][0-9A-Za-z]+|[0-9A-Za-z]+[.][0-9A-Za-z]*)[Pp][+-]?[0-9]+[FfLl]?\\b)" - - constant.number: "NULL" + - constant.bool: "(\\b(true|false|NULL|nullptr|TRUE|FALSE)\\b)" - constant.string: start: "\"" @@ -53,3 +52,4 @@ rules: end: "\\*/" rules: - todo: "(TODO|XXX|FIXME):?" + diff --git a/runtime/syntax/cpp.yaml b/runtime/syntax/cpp.yaml index 0da1da6f..3c97b2c4 100644 --- a/runtime/syntax/cpp.yaml +++ b/runtime/syntax/cpp.yaml @@ -9,7 +9,7 @@ rules: - type: "\\b(((s?size)|((u_?)?int(8|16|32|64|ptr))|char(8|16|32))_t|wchar_t)\\b" - type: "\\b[a-z_][0-9a-z_]+(_t|_T)\\b" - type: "\\b(final|override)\\b" - - type.keyword: "\\b(auto|volatile|const(expr|eval|init)?|mutable|register|thread_local|static|extern|decltype|explicit|virtual)\\b" + - statement: "\\b(auto|volatile|const(expr|eval|init)?|mutable|register|thread_local|static|extern|decltype|explicit|virtual)\\b" - statement: "\\b(class|namespace|template|typename|this|friend|using|public|protected|private|noexcept)\\b" - statement: "\\b(concept|requires)\\b" - statement: "\\b(import|export|module)\\b" @@ -34,7 +34,7 @@ rules: - constant.number: "(\\b(([0-9']*[.][0-9']+|[0-9']+[.][0-9']*)([Ee][+-]?[0-9']+)?|[0-9']+[Ee][+-]?[0-9']+)[FfLl]?\\b)" # Hexadecimal Floating-point Literals - constant.number: "(\\b0[Xx]([0-9a-zA-Z']*[.][0-9a-zA-Z']+|[0-9a-zA-Z']+[.][0-9a-zA-Z']*)[Pp][+-]?[0-9']+[FfLl]?\\b)" - - constant.bool: "(\\b(true|false|NULL|nullptr)\\b)" + - constant.bool: "(\\b(true|false|NULL|nullptr|TRUE|FALSE)\\b)" - constant.string: start: "\"" diff --git a/runtime/syntax/javascript.yaml b/runtime/syntax/javascript.yaml index 4006931b..b2bfe487 100644 --- a/runtime/syntax/javascript.yaml +++ b/runtime/syntax/javascript.yaml @@ -10,12 +10,7 @@ rules: - constant.number: "\\b[-+]?([0-9]+[EePp][+-]?[0-9]+)[fFlL]?" #- identifier: "[A-Za-z_][A-Za-z0-9_]*[[:space:]]*[(]" # ^ this is not correct usage of the identifier color - - symbol.brackets: "(\\{|\\})" - - symbol.brackets: "(\\(|\\))" - - symbol.brackets: "(\\[|\\])" - - symbol.brackets: "(\\{|\\})" - - symbol.brackets: "(\\(|\\))" - - symbol.brackets: "(\\[|\\])" + - symbol.brackets: "[(){}]|\\[|\\]" - symbol.operator: "([-+/*=<>!~%?:&|]|[.]{3})" - statement: "\\b(async|await|break|case|catch|const|continue|debugger|default)\\b" - statement: "\\b(delete|do|else|export|finally|for|function\\*?|class|extends)\\b" @@ -73,7 +68,9 @@ rules: - comment: start: "/\\*" end: "\\*/" + skip: "\\\\." rules: + - constant.specialChar: "\\\\." # function documentation - identifier: "\\s\\*\\s.*" - todo: "(TODO|XXX|FIXME)" diff --git a/runtime/syntax/justfile.yaml b/runtime/syntax/justfile.yaml index 4df61d57..926edb21 100644 --- a/runtime/syntax/justfile.yaml +++ b/runtime/syntax/justfile.yaml @@ -2,7 +2,7 @@ filetype: 'justfile' detect: - filename: '(^\\.?[Jj]ustfile|\\.just)$' + filename: "(^\\.?[Jj]ustfile|\\.just)$" header: "^#!.*/(env +)?[bg]?just --justfile" rules: diff --git a/runtime/syntax/php.yaml b/runtime/syntax/php.yaml index 7488be3c..8e57b8ca 100644 --- a/runtime/syntax/php.yaml +++ b/runtime/syntax/php.yaml @@ -9,6 +9,9 @@ rules: - symbol.tag: "(?i)<[/]?(a(bbr|cronym|ddress|pplet|rea|rticle|side|udio)?|b(ase(font)?|d(i|o)|ig|lockquote|r)?|ca(nvas|ption)|center|cite|co(de|l|lgroup)|d(ata(list)?|d|el|etails|fn|ialog|ir|l|t)|em(bed)?|fieldset|fig(caption|ure)|font|form|(i)?frame|frameset|h[1-6]|hr|i|img|in(put|s)|kbd|keygen|label|legend|li(nk)?|ma(in|p|rk)|menu(item)?|met(a|er)|nav|no(frames|script)|o(l|pt(group|ion)|utput)|p(aram|icture|re|rogress)?|q|r(p|t|uby)|s(trike)?|samp|se(ction|lect)|small|source|span|strong|su(b|p|mmary)|textarea|time|track|u(l)?|var|video|wbr)( .*|>)*?>" - symbol.tag.extended: "(?i)<[/]?(body|div|html|head(er)?|footer|title|table|t(body|d|h(ead)?|r|foot))( .*|>)*?>" - preproc: "(?i)<[/]?(script|style)( .*|>)*?>" + - preproc: "<\\?(php|=)?" + - preproc: "\\?>" + - preproc: "" - special: "&[^;[[:space:]]]*;" - symbol: "[:=]" - identifier: "(alt|bgcolor|height|href|label|longdesc|name|onclick|onfocus|onload|onmouseover|size|span|src|style|target|type|value|width)=" @@ -32,6 +35,17 @@ rules: - symbol.operator: "(=>|===|!==|==|!=|&&|\\|\\||::|=|->|\\!)" - identifier.var: "(\\$[a-zA-Z0-9\\-_]+)" - symbol.operator: "[\\(|\\)|/|+|\\-|\\*|\\[|.|,|;]" + - symbol.brackets: "(\\[|\\]|\\{|\\}|[()])" + + - comment: + start: "(^|[[:space:]])*(//|#)" + end: "$" + rules: [] + - comment: + start: "/\\*" + end: "\\*/" + rules: [] + - constant.string: start: "\"" end: "\"" @@ -44,14 +58,3 @@ rules: skip: "\\\\." rules: - constant.specialChar: "\\\\[abfnrtv'\\\"\\\\]" - - symbol.brackets: "(\\[|\\]|\\{|\\}|[()])" - - comment: "(^|[[:space:]])//.*" - - comment: "(^|[[:space:]])#.*" - - comment: - start: "/\\*" - end: "\\*/" - rules: [] - - - preproc: "<\\?(php|=)?" - - preproc: "\\?>" - - preproc: "" diff --git a/runtime/syntax/ruby.yaml b/runtime/syntax/ruby.yaml index f1f9e2a2..f04593ce 100644 --- a/runtime/syntax/ruby.yaml +++ b/runtime/syntax/ruby.yaml @@ -1,7 +1,7 @@ filetype: ruby detect: - filename: "\\.(rb|rake|gemspec)$|^(Gemfile|config.ru|Rakefile|Capfile|Vagrantfile|Guardfile|Appfile|Fastfile|Pluginfile|Podfile|\\.?[Bb]rewfile)$" + filename: "\\.(rb|rake|gemspec)$|^(.*[\\/])?(Gemfile|config.ru|Rakefile|Capfile|Vagrantfile|Guardfile|Appfile|Fastfile|Pluginfile|Podfile|\\.?[Bb]rewfile)$" header: "^#!.*/(env +)?ruby( |$)" rules: diff --git a/runtime/syntax/sh.yaml b/runtime/syntax/sh.yaml index 329b1014..038d95a5 100644 --- a/runtime/syntax/sh.yaml +++ b/runtime/syntax/sh.yaml @@ -23,7 +23,7 @@ filetype: shell # Fix command (fc) files: # * bash-fc. (followed by a random string) detect: - filename: '(\.(sh|bash|ash|ebuild)$|(\.bash(rc|_aliases|_functions|_profile)|\.?profile|Pkgfile|pkgmk\.conf|rc\.conf|PKGBUILD|APKBUILD)$|bash-fc\.)' + filename: "(\\.(sh|bash|ash|ebuild)$|(\\.bash(rc|_aliases|_functions|_profile)|\\.?profile|Pkgfile|pkgmk\\.conf|rc\\.conf|PKGBUILD|APKBUILD)$|bash-fc\\.)" header: "^#!.*/(env +)?(ba)?(a)?(mk)?sh( |$)" rules: @@ -31,7 +31,7 @@ rules: - constant.number: "\\b[0-9]+\\b" # Conditionals and control flow - statement: "\\b(case|do|done|elif|else|esac|exit|fi|for|function|if|in|local|read|return|select|shift|then|time|until|while)\\b" - - special: "(\\{|\\}|\\(|\\)|\\;|\\]|\\[|`|\\\\|\\$|<|>|!|=|&|\\|)" + - special: "[`$<>!=&~^\\{\\}\\(\\)\\;\\]\\[]+" # Shell commands - type: "\\b(cd|echo|export|let|set|umask|unset)\\b" # Common linux commands @@ -39,11 +39,10 @@ rules: # Coreutils commands - type: "\\b(base64|basename|cat|chcon|chgrp|chmod|chown|chroot|cksum|comm|cp|csplit|cut|date|dd|df|dir|dircolors|dirname|du|env|expand|expr|factor|false|fmt|fold|head|hostid|id|install|join|link|ln|logname|ls|md5sum|mkdir|mkfifo|mknod|mktemp|mv|nice|nl|nohup|nproc|numfmt|od|paste|pathchk|pinky|pr|printenv|printf|ptx|pwd|readlink|realpath|rm|rmdir|runcon|seq|(sha1|sha224|sha256|sha384|sha512)sum|shred|shuf|sleep|sort|split|stat|stdbuf|stty|sum|sync|tac|tail|tee|test|time|timeout|touch|tr|true|truncate|tsort|tty|uname|unexpand|uniq|unlink|users|vdir|wc|who|whoami|yes)\\b" # Conditional flags - - statement: "--[a-z-]+" - - statement: "\\ -[a-z]+" + - statement: " (-[A-Za-z]+|--[a-z]+)" - - identifier: "\\$\\{?[0-9A-Za-z_!@#$*?-]+\\}?" - - identifier: "\\$\\{?[0-9A-Za-z_!@#$*?-]+\\}?" + - identifier: "\\$\\{[0-9A-Za-z_:!%&=+#~@*^$?, .\\-\\/\\[\\]]+\\}" + - identifier: "\\$[0-9A-Za-z_:!%&=+#~@*^$?,\\-\\/\\[\\]]+" - constant.string: start: "\"" @@ -62,4 +61,3 @@ rules: end: "$" rules: - todo: "(TODO|XXX|FIXME):?" - diff --git a/runtime/syntax/vi.yaml b/runtime/syntax/vi.yaml index d83a80af..2a828dcd 100644 --- a/runtime/syntax/vi.yaml +++ b/runtime/syntax/vi.yaml @@ -4,12 +4,18 @@ detect: filename: "(^|/|\\.)(ex|vim)rc$|\\.vim" rules: - - identifier: "[A-Za-z_][A-Za-z0-9_]*[[:space:]]*[()]" + - identifier: "[A-Za-z_][A-Za-z0-9_]*[(]+[A-Za-z0-9_:.,\\s]*[)]+" + - special: "[()]+" - statement: "\\b([nvxsoilc]?(nore|un)?map|[nvlx]n|[ico]?no|[cilovx][um]|s?unm)\\b" - - statement: "\\b(snor|nun|nm|set|if|endif|let|unlet)\\b" - - statement: "[!&=]" + - statement: "\\b(snor|nun|nm|set|if|endif|let|unlet|source)\\b" + - statement: "[!&=?]" - constant.number: "\\b[0-9]+\\b" + - comment: + start: "(^\"|[ \t]+\" |[ \t]+\"$)" + end: "$" + rules: [] + - constant.string: start: "\"" end: "\"" @@ -23,10 +29,3 @@ rules: skip: "\\\\." rules: - constant.specialChar: "\\\\." - - - comment: - start: "\"" - end: "$" - rules: [] - -