From 433879046e18c79a6780080b233b04154582cbf7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6ran=20Karl?= <3951388+JoeKar@users.noreply.github.com> Date: Thu, 4 May 2023 23:48:42 +0200 Subject: [PATCH 1/5] Improve file detection with signature check capabilities This allows more complex detection upon regex rules for a certain amount of lines. --- internal/buffer/buffer.go | 54 ++++++++++++++++++++++++---- pkg/highlight/ftdetect.go | 18 ---------- pkg/highlight/parser.go | 56 +++++++++++++++++++++--------- runtime/help/colors.md | 6 ++-- runtime/syntax/PowerShell.yaml | 2 +- runtime/syntax/README.md | 2 +- runtime/syntax/awk.yaml | 2 +- runtime/syntax/bat.yaml | 2 +- runtime/syntax/crontab.yaml | 2 +- runtime/syntax/csx.yaml | 2 +- runtime/syntax/fish.yaml | 2 +- runtime/syntax/godoc.yaml | 2 +- runtime/syntax/groovy.yaml | 2 +- runtime/syntax/html4.yaml | 2 +- runtime/syntax/html5.yaml | 2 +- runtime/syntax/javascript.yaml | 2 +- runtime/syntax/json.yaml | 2 +- runtime/syntax/julia.yaml | 2 +- runtime/syntax/justfile.yaml | 2 +- runtime/syntax/mail.yaml | 2 +- runtime/syntax/make_headers.go | 17 ++++----- runtime/syntax/makefile.yaml | 2 +- runtime/syntax/nginx.yaml | 2 +- runtime/syntax/patch.yaml | 2 +- runtime/syntax/perl.yaml | 2 +- runtime/syntax/python2.yaml | 2 +- runtime/syntax/python3.yaml | 2 +- runtime/syntax/ruby.yaml | 2 +- runtime/syntax/sage.yaml | 2 +- runtime/syntax/sed.yaml | 2 +- runtime/syntax/sh.yaml | 2 +- runtime/syntax/syntax_converter.go | 2 +- runtime/syntax/systemd.yaml | 2 +- runtime/syntax/tcl.yaml | 2 +- runtime/syntax/xml.yaml | 2 +- runtime/syntax/yaml.yaml | 2 +- runtime/syntax/zsh.yaml | 2 +- 37 files changed, 130 insertions(+), 85 deletions(-) delete mode 100644 pkg/highlight/ftdetect.go diff --git a/internal/buffer/buffer.go b/internal/buffer/buffer.go index dc4d037f..3e4fc766 100644 --- a/internal/buffer/buffer.go +++ b/internal/buffer/buffer.go @@ -685,6 +685,16 @@ func (b *Buffer) UpdateRules() { if ft == "off" { return } + + // syntaxFileBuffer is a helper structure + // to store properties of one single syntax file + type syntaxFileBuffer struct { + header *highlight.Header + fileName string + syntaxDef *highlight.Def + } + + syntaxFiles := []syntaxFileBuffer{} syntaxFile := "" foundDef := false var header *highlight.Header @@ -707,16 +717,21 @@ func (b *Buffer) UpdateRules() { continue } - if ((ft == "unknown" || ft == "") && highlight.MatchFiletype(header.FtDetect, b.Path, b.lines[0].data)) || header.FileType == ft { + if ((ft == "unknown" || ft == "") && header.MatchFileName(b.Path)) || header.FileType == ft { syndef, err := highlight.ParseDef(file, header) if err != nil { screen.TermMessage("Error parsing syntax file " + f.Name() + ": " + err.Error()) continue } - b.SyntaxDef = syndef - syntaxFile = f.Name() foundDef = true - break + + if header.FileType == ft { + b.SyntaxDef = syndef + syntaxFile = f.Name() + break + } else { + syntaxFiles = append(syntaxFiles, syntaxFileBuffer{header, f.Name(), syndef}) + } } } @@ -735,9 +750,8 @@ func (b *Buffer) UpdateRules() { } if ft == "unknown" || ft == "" { - if highlight.MatchFiletype(header.FtDetect, b.Path, b.lines[0].data) { - syntaxFile = f.Name() - break + if header.MatchFileName(b.Path) { + syntaxFiles = append(syntaxFiles, syntaxFileBuffer{header, f.Name(), nil}) } } else if header.FileType == ft { syntaxFile = f.Name() @@ -745,6 +759,32 @@ func (b *Buffer) UpdateRules() { } } + if syntaxFile == "" { + length := len(syntaxFiles) + if length > 0 { + signatureMatch := false + if length > 1 { + for i := 0; i < length && !signatureMatch; i++ { + if syntaxFiles[i].header.HasFileSignature() { + for j := 0; j < 100 && !signatureMatch; j++ { + if syntaxFiles[i].header.MatchFileSignature(b.lines[j].data) { + syntaxFile = syntaxFiles[i].fileName + b.SyntaxDef = syntaxFiles[i].syntaxDef + header = syntaxFiles[i].header + signatureMatch = true + } + } + } + } + } + if length == 1 || !signatureMatch { + syntaxFile = syntaxFiles[0].fileName + b.SyntaxDef = syntaxFiles[0].syntaxDef + header = syntaxFiles[0].header + } + } + } + if syntaxFile != "" && !foundDef { // we found a syntax file using a syntax header file for _, f := range config.ListRuntimeFiles(config.RTSyntax) { diff --git a/pkg/highlight/ftdetect.go b/pkg/highlight/ftdetect.go deleted file mode 100644 index 580ade8f..00000000 --- a/pkg/highlight/ftdetect.go +++ /dev/null @@ -1,18 +0,0 @@ -package highlight - -import "regexp" - -// MatchFiletype will use the list of syntax definitions provided and the filename and first line of the file -// to determine the filetype of the file -// It will return the corresponding syntax definition for the filetype -func MatchFiletype(ftdetect [2]*regexp.Regexp, filename string, firstLine []byte) bool { - if ftdetect[0] != nil && ftdetect[0].MatchString(filename) { - return true - } - - if ftdetect[1] != nil { - return ftdetect[1].Match(firstLine) - } - - return false -} diff --git a/pkg/highlight/parser.go b/pkg/highlight/parser.go index f46d8259..92e290fe 100644 --- a/pkg/highlight/parser.go +++ b/pkg/highlight/parser.go @@ -33,27 +33,26 @@ func (g Group) String() string { // Then it has the rules which define how to highlight the file type Def struct { *Header - rules *rules } type Header struct { - FileType string - FtDetect [2]*regexp.Regexp + FileType string + FileNameRegex *regexp.Regexp + SignatureRegex *regexp.Regexp } type HeaderYaml struct { FileType string `yaml:"filetype"` Detect struct { - FNameRgx string `yaml:"filename"` - HeaderRgx string `yaml:"header"` + FNameRegexStr string `yaml:"filename"` + SignatureRegexStr string `yaml:"signature"` } `yaml:"detect"` } type File struct { FileType string - - yamlSrc map[interface{}]interface{} + yamlSrc map[interface{}]interface{} } // A Pattern is one simple syntax rule @@ -103,14 +102,14 @@ func MakeHeader(data []byte) (*Header, error) { header := new(Header) var err error header.FileType = string(lines[0]) - fnameRgx := string(lines[1]) - headerRgx := string(lines[2]) + fnameRegexStr := string(lines[1]) + signatureRegexStr := string(lines[2]) - if fnameRgx != "" { - header.FtDetect[0], err = regexp.Compile(fnameRgx) + if fnameRegexStr != "" { + header.FileNameRegex, err = regexp.Compile(fnameRegexStr) } - if err == nil && headerRgx != "" { - header.FtDetect[1], err = regexp.Compile(headerRgx) + if err == nil && signatureRegexStr != "" { + header.SignatureRegex, err = regexp.Compile(signatureRegexStr) } if err != nil { @@ -132,11 +131,11 @@ func MakeHeaderYaml(data []byte) (*Header, error) { header := new(Header) header.FileType = hdrYaml.FileType - if hdrYaml.Detect.FNameRgx != "" { - header.FtDetect[0], err = regexp.Compile(hdrYaml.Detect.FNameRgx) + if hdrYaml.Detect.FNameRegexStr != "" { + header.FileNameRegex, err = regexp.Compile(hdrYaml.Detect.FNameRegexStr) } - if err == nil && hdrYaml.Detect.HeaderRgx != "" { - header.FtDetect[1], err = regexp.Compile(hdrYaml.Detect.HeaderRgx) + if err == nil && hdrYaml.Detect.SignatureRegexStr != "" { + header.SignatureRegex, err = regexp.Compile(hdrYaml.Detect.SignatureRegexStr) } if err != nil { @@ -146,6 +145,29 @@ func MakeHeaderYaml(data []byte) (*Header, error) { return header, nil } +// MatchFileName will check the given file name with the stored regex +func (header *Header) MatchFileName(filename string) bool { + if header.FileNameRegex != nil { + return header.FileNameRegex.MatchString(filename) + } + + return false +} + +// HasFileSignature checks the presence of a stored signature +func (header *Header) HasFileSignature() bool { + return header.SignatureRegex != nil +} + +// MatchFileSignature will check the given line with the stored regex +func (header *Header) MatchFileSignature(line []byte) bool { + if header.SignatureRegex != nil { + return header.SignatureRegex.Match(line) + } + + return false +} + func ParseFile(input []byte) (f *File, err error) { // This is just so if we have an error, we can exit cleanly and return the parse error to the user defer func() { diff --git a/runtime/help/colors.md b/runtime/help/colors.md index ac8b4be3..4a3ee7e0 100644 --- a/runtime/help/colors.md +++ b/runtime/help/colors.md @@ -267,13 +267,13 @@ detect: ``` Micro will match this regex against a given filename to detect the filetype. -You may also provide an optional `header` regex that will check the first line -of the file. For example: +You may also provide an optional `signature` regex that will check a certain +amount of lines of a file to find specific marks. For example: ``` detect: filename: "\\.ya?ml$" - header: "%YAML" + signature: "%YAML" ``` ### Syntax rules diff --git a/runtime/syntax/PowerShell.yaml b/runtime/syntax/PowerShell.yaml index 7a45e426..daaa2b21 100644 --- a/runtime/syntax/PowerShell.yaml +++ b/runtime/syntax/PowerShell.yaml @@ -5,7 +5,7 @@ filetype: powershell detect: filename: "\\.ps(1|m1|d1)$" - #header: "" + #signature: "" rules: # - comment.block: # Block Comment diff --git a/runtime/syntax/README.md b/runtime/syntax/README.md index 7d1a9de4..e97a4f77 100644 --- a/runtime/syntax/README.md +++ b/runtime/syntax/README.md @@ -2,7 +2,7 @@ Here are micro's syntax files. -Each yaml file specifies how to detect the filetype based on file extension or headers (first line of the file). +Each yaml file specifies how to detect the filetype based on file extension or given signature. The signature can be matched to a maximum of 100 lines (to limit parse times) for a best "guess". Then there are patterns and regions linked to highlight groups which tell micro how to highlight that filetype. Making your own syntax files is very simple. I recommend you check the file after you are finished with the diff --git a/runtime/syntax/awk.yaml b/runtime/syntax/awk.yaml index ff3f6988..93ddf9ae 100644 --- a/runtime/syntax/awk.yaml +++ b/runtime/syntax/awk.yaml @@ -2,7 +2,7 @@ filetype: awk detect: filename: "\\.awk$" - header: "^#!.*bin/(env +)?awk( |$)" + signature: "^#!.*bin/(env +)?awk( |$)" rules: - preproc: "\\$[A-Za-z0-9_!@#$*?\\-]+" diff --git a/runtime/syntax/bat.yaml b/runtime/syntax/bat.yaml index 2ef8d987..741f7437 100644 --- a/runtime/syntax/bat.yaml +++ b/runtime/syntax/bat.yaml @@ -2,7 +2,7 @@ filetype: batch detect: filename: "(\\.bat$|\\.cmd$)" - # header: "" + # signature: "" rules: # Numbers diff --git a/runtime/syntax/crontab.yaml b/runtime/syntax/crontab.yaml index cebc7cad..aec2e78f 100644 --- a/runtime/syntax/crontab.yaml +++ b/runtime/syntax/crontab.yaml @@ -2,7 +2,7 @@ filetype: crontab detect: filename: "crontab$" - header: "^#.*?/etc/crontab" + signature: "^#.*?/etc/crontab" rules: # The time and date fields are: diff --git a/runtime/syntax/csx.yaml b/runtime/syntax/csx.yaml index a3a13a6c..3710af17 100644 --- a/runtime/syntax/csx.yaml +++ b/runtime/syntax/csx.yaml @@ -1,7 +1,7 @@ filetype: csharp-script detect: filename: "\\.csx$" - header: "^#!.*/(env +)?dotnet-script( |$)" + signature: "^#!.*/(env +)?dotnet-script( |$)" rules: - include: "csharp" diff --git a/runtime/syntax/fish.yaml b/runtime/syntax/fish.yaml index 88798a04..e5078097 100644 --- a/runtime/syntax/fish.yaml +++ b/runtime/syntax/fish.yaml @@ -2,7 +2,7 @@ filetype: fish detect: filename: "\\.fish$" - header: "^#!.*/(env +)?fish( |$)" + signature: "^#!.*/(env +)?fish( |$)" rules: # Numbers diff --git a/runtime/syntax/godoc.yaml b/runtime/syntax/godoc.yaml index b7726296..4aa1bc45 100644 --- a/runtime/syntax/godoc.yaml +++ b/runtime/syntax/godoc.yaml @@ -5,7 +5,7 @@ filetype: godoc detect: filename: "\\.godoc$" - header: package.*import + signature: package.*import rules: - preproc: "^[^ ].*" diff --git a/runtime/syntax/groovy.yaml b/runtime/syntax/groovy.yaml index 3aa0e283..a19cdcd3 100644 --- a/runtime/syntax/groovy.yaml +++ b/runtime/syntax/groovy.yaml @@ -2,7 +2,7 @@ filetype: groovy detect: filename: "(\\.(groovy|gy|gvy|gsh|gradle)$|^[Jj]enkinsfile$)" - header: "^#!.*/(env +)?groovy *$" + signature: "^#!.*/(env +)?groovy *$" rules: # And the style guide for constants is CONSTANT_CASE diff --git a/runtime/syntax/html4.yaml b/runtime/syntax/html4.yaml index c132d61e..a7cfae3f 100644 --- a/runtime/syntax/html4.yaml +++ b/runtime/syntax/html4.yaml @@ -2,7 +2,7 @@ filetype: html4 detect: filename: "\\.htm[l]?4$" - header: "" + signature: "" rules: - error: "<[^!].*?>" diff --git a/runtime/syntax/html5.yaml b/runtime/syntax/html5.yaml index 411d5385..97bffde2 100644 --- a/runtime/syntax/html5.yaml +++ b/runtime/syntax/html5.yaml @@ -2,7 +2,7 @@ filetype: html5 detect: filename: "\\.htm[l]?5$" - header: "" + signature: "" rules: - error: "<[^!].*?>" diff --git a/runtime/syntax/javascript.yaml b/runtime/syntax/javascript.yaml index b2bfe487..0b42caa6 100644 --- a/runtime/syntax/javascript.yaml +++ b/runtime/syntax/javascript.yaml @@ -2,7 +2,7 @@ filetype: javascript detect: filename: "(\\.js$|\\.es[5678]?$|\\.mjs$)" - header: "^#!.*/(env +)?node( |$)" + signature: "^#!.*/(env +)?node( |$)" rules: - constant.number: "\\b[-+]?([1-9][0-9]*|0[0-7]*|0x[0-9a-fA-F]+)([uU][lL]?|[lL][uU]?)?\\b" diff --git a/runtime/syntax/json.yaml b/runtime/syntax/json.yaml index c590bd38..35b483e9 100644 --- a/runtime/syntax/json.yaml +++ b/runtime/syntax/json.yaml @@ -2,7 +2,7 @@ filetype: json detect: filename: "\\.json$" - header: "^\\{$" + signature: "^\\{$" rules: - constant.number: "\\b[-+]?([1-9][0-9]*|0[0-7]*|0x[0-9a-fA-F]+)([uU][lL]?|[lL][uU]?)?\\b" diff --git a/runtime/syntax/julia.yaml b/runtime/syntax/julia.yaml index c96ef0f3..8a46e5cf 100644 --- a/runtime/syntax/julia.yaml +++ b/runtime/syntax/julia.yaml @@ -2,7 +2,7 @@ filetype: julia detect: filename: "\\.jl$" - header: "^#!.*/(env +)?julia( |$)" + signature: "^#!.*/(env +)?julia( |$)" rules: diff --git a/runtime/syntax/justfile.yaml b/runtime/syntax/justfile.yaml index 926edb21..2a856edb 100644 --- a/runtime/syntax/justfile.yaml +++ b/runtime/syntax/justfile.yaml @@ -3,7 +3,7 @@ filetype: 'justfile' detect: filename: "(^\\.?[Jj]ustfile|\\.just)$" - header: "^#!.*/(env +)?[bg]?just --justfile" + signature: "^#!.*/(env +)?[bg]?just --justfile" rules: - preproc: "\\<(ifeq|ifdef|ifneq|ifndef|else|endif)\\>" diff --git a/runtime/syntax/mail.yaml b/runtime/syntax/mail.yaml index 57aa0344..a400b28b 100644 --- a/runtime/syntax/mail.yaml +++ b/runtime/syntax/mail.yaml @@ -2,7 +2,7 @@ filetype: mail detect: filename: "(.*/mutt-.*|\\.eml)$" - header: "^From .* \\d+:\\d+:\\d+ \\d+" + signature: "^From .* \\d+:\\d+:\\d+ \\d+" rules: - type: "^From .*" diff --git a/runtime/syntax/make_headers.go b/runtime/syntax/make_headers.go index e04d8062..c80c680e 100644 --- a/runtime/syntax/make_headers.go +++ b/runtime/syntax/make_headers.go @@ -1,4 +1,5 @@ -//+build ignore +//go:build ignore +// +build ignore package main @@ -16,15 +17,15 @@ import ( type HeaderYaml struct { FileType string `yaml:"filetype"` Detect struct { - FNameRgx string `yaml:"filename"` - HeaderRgx string `yaml:"header"` + FNameRgx string `yaml:"filename"` + SignatureRgx string `yaml:"signature"` } `yaml:"detect"` } type Header struct { - FileType string - FNameRgx string - HeaderRgx string + FileType string + FNameRgx string + SignatureRgx string } func main() { @@ -58,7 +59,7 @@ func encode(name string, c HeaderYaml) { f, _ := os.Create(name + ".hdr") f.WriteString(c.FileType + "\n") f.WriteString(c.Detect.FNameRgx + "\n") - f.WriteString(c.Detect.HeaderRgx + "\n") + f.WriteString(c.Detect.SignatureRgx + "\n") f.Close() } @@ -69,7 +70,7 @@ func decode(name string) Header { var hdr Header hdr.FileType = string(strs[0]) hdr.FNameRgx = string(strs[1]) - hdr.HeaderRgx = string(strs[2]) + hdr.SignatureRgx = string(strs[2]) fmt.Printf("took %v\n", time.Since(start)) return hdr diff --git a/runtime/syntax/makefile.yaml b/runtime/syntax/makefile.yaml index 7e90cdeb..670935fa 100644 --- a/runtime/syntax/makefile.yaml +++ b/runtime/syntax/makefile.yaml @@ -2,7 +2,7 @@ filetype: makefile detect: filename: "([Mm]akefile|\\.ma?k)$" - header: "^#!.*/(env +)?[bg]?make( |$)" + signature: "^#!.*/(env +)?[bg]?make( |$)" rules: - preproc: "\\<(ifeq|ifdef|ifneq|ifndef|else|endif)\\>" diff --git a/runtime/syntax/nginx.yaml b/runtime/syntax/nginx.yaml index c2223b5a..c35e213e 100644 --- a/runtime/syntax/nginx.yaml +++ b/runtime/syntax/nginx.yaml @@ -2,7 +2,7 @@ filetype: nginx detect: filename: "nginx.*\\.conf$|\\.nginx$" - header: "^(server|upstream)[a-z ]*\\{$" + signature: "^(server|upstream)[a-z ]*\\{$" rules: - preproc: "\\b(events|server|http|location|upstream)[[:space:]]*\\{" diff --git a/runtime/syntax/patch.yaml b/runtime/syntax/patch.yaml index 996bdc38..6275d423 100644 --- a/runtime/syntax/patch.yaml +++ b/runtime/syntax/patch.yaml @@ -2,7 +2,7 @@ filetype: patch detect: filename: "\\.(patch|diff)$" - header: "^diff" + signature: "^diff" rules: - brightgreen: "^\\+.*" diff --git a/runtime/syntax/perl.yaml b/runtime/syntax/perl.yaml index 984cc24a..5a8ad3cd 100644 --- a/runtime/syntax/perl.yaml +++ b/runtime/syntax/perl.yaml @@ -2,7 +2,7 @@ filetype: perl detect: filename: "\\.p[lmp]$" - header: "^#!.*/(env +)?perl( |$)" + signature: "^#!.*/(env +)?perl( |$)" rules: - type: "\\b(accept|alarm|atan2|bin(d|mode)|c(aller|homp|h(dir|mod|op|own|root)|lose(dir)?|onnect|os|rypt)|d(bm(close|open)|efined|elete|ie|o|ump)|e(ach|of|val|x(ec|ists|it|p))|f(cntl|ileno|lock|ork))\\b|\\b(get(c|login|peername|pgrp|ppid|priority|pwnam|(host|net|proto|serv)byname|pwuid|grgid|(host|net)byaddr|protobynumber|servbyport)|([gs]et|end)(pw|gr|host|net|proto|serv)ent|getsock(name|opt)|gmtime|goto|grep|hex|index|int|ioctl|join)\\b|\\b(keys|kill|last|length|link|listen|local(time)?|log|lstat|m|mkdir|msg(ctl|get|snd|rcv)|next|oct|open(dir)?|ord|pack|pipe|pop|printf?|push|q|qq|qx|rand|re(ad(dir|link)?|cv|say|do|name|quire|set|turn|verse|winddir)|rindex|rmdir|s|scalar|seek(dir)?)\\b|\\b(se(lect|mctl|mget|mop|nd|tpgrp|tpriority|tsockopt)|shift|shm(ctl|get|read|write)|shutdown|sin|sleep|socket(pair)?|sort|spli(ce|t)|sprintf|sqrt|srand|stat|study|substr|symlink|sys(call|read|tem|write)|tell(dir)?|time|tr(y)?|truncate|umask)\\b|\\b(un(def|link|pack|shift)|utime|values|vec|wait(pid)?|wantarray|warn|write)\\b" diff --git a/runtime/syntax/python2.yaml b/runtime/syntax/python2.yaml index 3a993b05..42f7ffb4 100644 --- a/runtime/syntax/python2.yaml +++ b/runtime/syntax/python2.yaml @@ -2,7 +2,7 @@ filetype: python2 detect: filename: "\\.py2$" - header: "^#!.*/(env +)?python2$" + signature: "^#!.*/(env +)?python2$" rules: diff --git a/runtime/syntax/python3.yaml b/runtime/syntax/python3.yaml index 5a060bff..7e18df6e 100644 --- a/runtime/syntax/python3.yaml +++ b/runtime/syntax/python3.yaml @@ -2,7 +2,7 @@ filetype: python detect: filename: "\\.py(3)?$" - header: "^#!.*/(env +)?python(3)?$" + signature: "^#!.*/(env +)?python(3)?$" rules: # built-in objects diff --git a/runtime/syntax/ruby.yaml b/runtime/syntax/ruby.yaml index f04593ce..4a432d3b 100644 --- a/runtime/syntax/ruby.yaml +++ b/runtime/syntax/ruby.yaml @@ -2,7 +2,7 @@ filetype: ruby detect: filename: "\\.(rb|rake|gemspec)$|^(.*[\\/])?(Gemfile|config.ru|Rakefile|Capfile|Vagrantfile|Guardfile|Appfile|Fastfile|Pluginfile|Podfile|\\.?[Bb]rewfile)$" - header: "^#!.*/(env +)?ruby( |$)" + signature: "^#!.*/(env +)?ruby( |$)" rules: - comment.bright: diff --git a/runtime/syntax/sage.yaml b/runtime/syntax/sage.yaml index e24fbeb6..8d2cb07a 100644 --- a/runtime/syntax/sage.yaml +++ b/runtime/syntax/sage.yaml @@ -2,7 +2,7 @@ filetype: sage detect: filename: "\\.sage$" - header: "^#!.*/(env +)?sage( |$)" + signature: "^#!.*/(env +)?sage( |$)" rules: diff --git a/runtime/syntax/sed.yaml b/runtime/syntax/sed.yaml index dc5f7adc..1b297c44 100644 --- a/runtime/syntax/sed.yaml +++ b/runtime/syntax/sed.yaml @@ -2,7 +2,7 @@ filetype: sed detect: filename: "\\.sed$" - header: "^#!.*bin/(env +)?sed( |$)" + signature: "^#!.*bin/(env +)?sed( |$)" rules: - symbol.operator: "[|^$.*+]" diff --git a/runtime/syntax/sh.yaml b/runtime/syntax/sh.yaml index ab47bf1d..6c122de2 100644 --- a/runtime/syntax/sh.yaml +++ b/runtime/syntax/sh.yaml @@ -24,7 +24,7 @@ filetype: shell # * bash-fc. (followed by a random string) detect: filename: "(\\.(sh|bash|ash|ebuild)$|(\\.bash(rc|_aliases|_functions|_profile)|\\.?profile|Pkgfile|pkgmk\\.conf|rc\\.conf|PKGBUILD|APKBUILD)$|bash-fc\\.)" - header: "^#!.*/(env +)?(ba)?(a)?(mk)?sh( |$)" + signature: "^#!.*/(env +)?(ba)?(a)?(mk)?sh( |$)" rules: # Numbers diff --git a/runtime/syntax/syntax_converter.go b/runtime/syntax/syntax_converter.go index d2954e3a..f8af15dc 100644 --- a/runtime/syntax/syntax_converter.go +++ b/runtime/syntax/syntax_converter.go @@ -137,7 +137,7 @@ func generateFile(filetype, syntax, header string, rules []interface{}) string { output += fmt.Sprintf("detect: \n filename: \"%s\"\n", strings.Replace(strings.Replace(syntax, "\\", "\\\\", -1), "\"", "\\\"", -1)) if header != "" { - output += fmt.Sprintf(" header: \"%s\"\n", strings.Replace(strings.Replace(header, "\\", "\\\\", -1), "\"", "\\\"", -1)) + output += fmt.Sprintf(" signature: \"%s\"\n", strings.Replace(strings.Replace(header, "\\", "\\\\", -1), "\"", "\\\"", -1)) } output += "\nrules:\n" diff --git a/runtime/syntax/systemd.yaml b/runtime/syntax/systemd.yaml index a8650be4..9b668776 100644 --- a/runtime/syntax/systemd.yaml +++ b/runtime/syntax/systemd.yaml @@ -2,7 +2,7 @@ filetype: systemd detect: filename: "\\.(service|socket|timer)$" - header: "^\\[Unit\\]$" + signature: "^\\[Unit\\]$" rules: - statement: "^(Accept|After|Alias|AllowIsolate|Also|ANSI_COLOR|_AUDIT_LOGINUID|_AUDIT_SESSION|Backlog|Before|BindIPv6Only|BindsTo|BindToDevice|BlockIOReadBandwidth|BlockIOWeight|BlockIOWriteBandwidth|_BOOT_ID|Broadcast|BUG_REPORT_URL|BusName|Capabilities|CapabilityBoundingSet|CHASSIS|cipher|class|_CMDLINE|CODE_FILE|CODE_FUNC|CODE_LINE|_COMM|Compress|ConditionACPower|ConditionCapability|ConditionDirectoryNotEmpty|ConditionFileIsExecutable|ConditionFileNotEmpty|ConditionHost|ConditionKernelCommandLine|ConditionNull|ConditionPathExists|ConditionPathExistsGlob|ConditionPathIsDirectory|ConditionPathIsMountPoint|ConditionPathIsReadWrite|ConditionPathIsSymbolicLink|ConditionSecurity|ConditionVirtualization|Conflicts|ControlGroup|ControlGroupAttribute|ControlGroupModify|ControlGroupPersistent|controllers|Controllers|CPE_NAME|CPUAffinity|CPUSchedulingPolicy|CPUSchedulingPriority|CPUSchedulingResetOnFork|CPUShares|CrashChVT|CrashShell|__CURSOR|debug|DefaultControllers|DefaultDependencies|DefaultLimitAS|DefaultLimitCORE|DefaultLimitCPU|DefaultLimitDATA|DefaultLimitFSIZE|DefaultLimitLOCKS|DefaultLimitMEMLOCK|DefaultLimitMSGQUEUE|DefaultLimitNICE|DefaultLimitNOFILE|DefaultLimitNPROC|DefaultLimitRSS|DefaultLimitRTPRIO|DefaultLimitRTTIME|DefaultLimitSIGPENDING|DefaultLimitSTACK|DefaultStandardError|DefaultStandardOutput|Description|DeviceAllow|DeviceDeny|DirectoryMode|DirectoryNotEmpty|Documentation|DumpCore|entropy|Environment|EnvironmentFile|ERRNO|event_timeout|_EXE|ExecReload|ExecStart|ExecStartPost|ExecStartPre|ExecStop|ExecStopPost|ExecStopPre|filter|FONT|FONT_MAP|FONT_UNIMAP|ForwardToConsole|ForwardToKMsg|ForwardToSyslog|FreeBind|freq|FsckPassNo|fstab|_GID|Group|GuessMainPID|HandleHibernateKey|HandleLidSwitch|HandlePowerKey|HandleSuspendKey|hash|HibernateKeyIgnoreInhibited|HOME_URL|_HOSTNAME|ICON_NAME|ID|IdleAction|IdleActionSec|ID_LIKE|ID_MODEL|ID_MODEL_FROM_DATABASE|IgnoreOnIsolate|IgnoreOnSnapshot|IgnoreSIGPIPE|InaccessibleDirectories|InhibitDelayMaxSec|init|IOSchedulingClass|IOSchedulingPriority|IPTOS|IPTTL|JobTimeoutSec|JoinControllers|KeepAlive|KEYMAP|KEYMAP_TOGGLE|KillExcludeUsers|KillMode|KillOnlyUsers|KillSignal|KillUserProcesses|LidSwitchIgnoreInhibited|LimitAS|LimitCORE|LimitCPU|LimitDATA|LimitFSIZE|LimitLOCKS|LimitMEMLOCK|LimitMSGQUEUE|LimitNICE|LimitNOFILE|LimitNPROC|LimitRSS|LimitRTPRIO|LimitRTTIME|LimitSIGPENDING|LimitSTACK|link_priority|valueListenDatagram|ListenFIFO|ListenMessageQueue|ListenNetlink|ListenSequentialPacket|ListenSpecial|ListenStream|LogColor|LogLevel|LogLocation|LogTarget|luks|_MACHINE_ID|MakeDirectory|Mark|MaxConnections|MaxFileSec|MaxLevelConsole|MaxLevelKMsg|MaxLevelStore|MaxLevelSyslog|MaxRetentionSec|MemoryLimit|MemorySoftLimit|MESSAGE|MESSAGE_ID|MessageQueueMaxMessages|MessageQueueMessageSize|__MONOTONIC_TIMESTAMP|MountFlags|NAME|NAutoVTs|Nice|NonBlocking|NoNewPrivileges|NotifyAccess|OnActiveSec|OnBootSec|OnCalendar|OnFailure|OnFailureIsolate|OnStartupSec|OnUnitActiveSec|OnUnitInactiveSec|OOMScoreAdjust|Options|output|PAMName|PartOf|PassCredentials|PassSecurity|PathChanged|PathExists|PathExistsGlob|PathModified|PermissionsStartOnly|_PID|PIDFile|PipeSize|PowerKeyIgnoreInhibited|PRETTY_HOSTNAME|PRETTY_NAME|Priority|PRIORITY|PrivateNetwork|PrivateTmp|PropagatesReloadTo|pss|RateLimitBurst|RateLimitInterval|ReadOnlyDirectories|ReadWriteDirectories|__REALTIME_TIMESTAMP|ReceiveBuffer|RefuseManualStart|RefuseManualStop|rel|ReloadPropagatedFrom|RemainAfterExit|RequiredBy|Requires|RequiresMountsFor|RequiresOverridable|Requisite|RequisiteOverridable|ReserveVT|ResetControllers|Restart|RestartPreventExitStatus|RestartSec|RootDirectory|RootDirectoryStartOnly|RuntimeKeepFree|RuntimeMaxFileSize|RuntimeMaxUse|RuntimeWatchdogSec|samples|scale_x|scale_y|Seal|SecureBits|_SELINUX_CONTEXT|SendBuffer|SendSIGKILL|Service|ShowStatus|ShutdownWatchdogSec|size|SmackLabel|SmackLabelIPIn|SmackLabelIPOut|SocketMode|Sockets|SourcePath|_SOURCE_REALTIME_TIMESTAMP|SplitMode|StandardError|StandardInput|StandardOutput|StartLimitAction|StartLimitBurst|StartLimitInterval|static_node|StopWhenUnneeded|Storage|string_escape|none|replaceSuccessExitStatus|SupplementaryGroups|SUPPORT_URL|SuspendKeyIgnoreInhibited|SyslogFacility|SYSLOG_FACILITY|SyslogIdentifier|SYSLOG_IDENTIFIER|SyslogLevel|SyslogLevelPrefix|SYSLOG_PID|SystemCallFilter|SYSTEMD_ALIAS|_SYSTEMD_CGROUP|_SYSTEMD_OWNER_UID|SYSTEMD_READY|_SYSTEMD_SESSION|_SYSTEMD_UNIT|_SYSTEMD_USER_UNIT|SYSTEMD_WANTS|SystemKeepFree|SystemMaxFileSize|SystemMaxUse|SysVStartPriority|TCPCongestion|TCPWrapName|timeout|TimeoutSec|TimeoutStartSec|TimeoutStopSec|TimerSlackNSec|Transparent|_TRANSPORT|tries|TTYPath|TTYReset|TTYVHangup|TTYVTDisallocate|Type|_UID|UMask|Unit|User|UtmpIdentifier|VERSION|VERSION_ID|WantedBy|Wants|WatchdogSec|What|Where|WorkingDirectory)=" diff --git a/runtime/syntax/tcl.yaml b/runtime/syntax/tcl.yaml index b87a7d79..1b4ae7e5 100644 --- a/runtime/syntax/tcl.yaml +++ b/runtime/syntax/tcl.yaml @@ -2,7 +2,7 @@ filetype: tcl detect: filename: "\\.tcl$" - header: "^#!.*/(env +)?tclsh( |$)" + signature: "^#!.*/(env +)?tclsh( |$)" rules: - statement: "\\b(after|append|array|auto_execok|auto_import|auto_load|auto_load_index|auto_qualify|binary|break|case|catch|cd|clock|close|concat|continue|else|elseif|encoding|eof|error|eval|exec|exit|expr|fblocked|fconfigure|fcopy|file|fileevent|flush|for|foreach|format|gets|glob|global|history|if|incr|info|interp|join|lappend|lindex|linsert|list|llength|load|lrange|lreplace|lsearch|lset|lsort|namespace|open|package|pid|puts|pwd|read|regexp|regsub|rename|return|scan|seek|set|socket|source|split|string|subst|switch|tclLog|tell|time|trace|unknown|unset|update|uplevel|upvar|variable|vwait|while)\\b" diff --git a/runtime/syntax/xml.yaml b/runtime/syntax/xml.yaml index df4cde81..0e9b901e 100644 --- a/runtime/syntax/xml.yaml +++ b/runtime/syntax/xml.yaml @@ -2,7 +2,7 @@ filetype: xml detect: filename: "\\.(xml|sgml?|rng|svg|plist)$" - header: "<\\?xml.*\\?>" + signature: "<\\?xml.*\\?>" rules: - preproc: diff --git a/runtime/syntax/yaml.yaml b/runtime/syntax/yaml.yaml index 54d4a647..c21286e4 100644 --- a/runtime/syntax/yaml.yaml +++ b/runtime/syntax/yaml.yaml @@ -2,7 +2,7 @@ filetype: yaml detect: filename: "\\.ya?ml$" - header: "%YAML" + signature: "%YAML" rules: - type: "(^| )!!(binary|bool|float|int|map|null|omap|seq|set|str) " diff --git a/runtime/syntax/zsh.yaml b/runtime/syntax/zsh.yaml index a2832131..3b7e0593 100644 --- a/runtime/syntax/zsh.yaml +++ b/runtime/syntax/zsh.yaml @@ -2,7 +2,7 @@ filetype: zsh detect: filename: "(\\.zsh$|\\.?(zshenv|zprofile|zshrc|zlogin|zlogout)$)" - header: "^#!.*/(env +)?zsh( |$)" + signature: "^#!.*/(env +)?zsh( |$)" rules: ## Numbers From 93151f81093f02838e4592b5b35a92af4728f9e6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6ran=20Karl?= <3951388+JoeKar@users.noreply.github.com> Date: Fri, 8 Sep 2023 21:45:00 +0200 Subject: [PATCH 2/5] syntax: Prepare a concrete signature example for objective C --- runtime/syntax/objc.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/runtime/syntax/objc.yaml b/runtime/syntax/objc.yaml index 223d6929..d4cccac3 100644 --- a/runtime/syntax/objc.yaml +++ b/runtime/syntax/objc.yaml @@ -2,6 +2,7 @@ filetype: objective-c detect: filename: "\\.(m|mm|h)$" + signature: "(obj|objective)-c|#import|@(encode|end|interface|implementation|selector|protocol|synchronized|try|catch|finally|property|optional|required|import|autoreleasepool)" rules: - type: "\\b(float|double|CGFloat|id|bool|BOOL|Boolean|char|int|short|long|sizeof|enum|void|static|const|struct|union|typedef|extern|(un)?signed|inline|Class|SEL|IMP|NS(U)?Integer)\\b" From 2aa386f4556e49853e5c889d1d104499354d175c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6ran=20Karl?= <3951388+JoeKar@users.noreply.github.com> Date: Thu, 26 Oct 2023 20:24:21 +0200 Subject: [PATCH 3/5] syntax: Prepare a concrete signature example for C++ --- runtime/syntax/cpp.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/runtime/syntax/cpp.yaml b/runtime/syntax/cpp.yaml index 3c97b2c4..e84c3cd4 100644 --- a/runtime/syntax/cpp.yaml +++ b/runtime/syntax/cpp.yaml @@ -1,7 +1,8 @@ filetype: c++ detect: - filename: "(\\.c(c|pp|xx)$|\\.h(h|pp|xx)$|\\.ii?$|\\.(def)$)" + filename: "(\\.c(c|pp|xx)$|\\.h(h|pp|xx)?$|\\.ii?$|\\.(def)$)" + signature: "namespace|template|public|protected|private" rules: - identifier: "\\b[A-Z_][0-9A-Z_]*\\b" From 2d0d0416e7725fd8db16dc93816269cce7ac797f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6ran=20Karl?= <3951388+JoeKar@users.noreply.github.com> Date: Fri, 8 Sep 2023 20:44:00 +0200 Subject: [PATCH 4/5] buffer: Prefer user defined over built-in file types --- internal/buffer/buffer.go | 42 ++++++++++++++++++++------------------- 1 file changed, 22 insertions(+), 20 deletions(-) diff --git a/internal/buffer/buffer.go b/internal/buffer/buffer.go index 3e4fc766..29dd823a 100644 --- a/internal/buffer/buffer.go +++ b/internal/buffer/buffer.go @@ -735,27 +735,29 @@ func (b *Buffer) UpdateRules() { } } - // search in the default syntax files - for _, f := range config.ListRuntimeFiles(config.RTSyntaxHeader) { - data, err := f.Data() - if err != nil { - screen.TermMessage("Error loading syntax header file " + f.Name() + ": " + err.Error()) - continue - } - - header, err = highlight.MakeHeader(data) - if err != nil { - screen.TermMessage("Error reading syntax header file", f.Name(), err) - continue - } - - if ft == "unknown" || ft == "" { - if header.MatchFileName(b.Path) { - syntaxFiles = append(syntaxFiles, syntaxFileBuffer{header, f.Name(), nil}) + if !foundDef { + // search in the default syntax files + for _, f := range config.ListRuntimeFiles(config.RTSyntaxHeader) { + data, err := f.Data() + if err != nil { + screen.TermMessage("Error loading syntax header file " + f.Name() + ": " + err.Error()) + continue + } + + header, err = highlight.MakeHeader(data) + if err != nil { + screen.TermMessage("Error reading syntax header file", f.Name(), err) + continue + } + + if ft == "unknown" || ft == "" { + if header.MatchFileName(b.Path) { + syntaxFiles = append(syntaxFiles, syntaxFileBuffer{header, f.Name(), nil}) + } + } else if header.FileType == ft { + syntaxFile = f.Name() + break } - } else if header.FileType == ft { - syntaxFile = f.Name() - break } } From 3c16df87ee622ed2e526896957da98234e081dad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6ran=20Karl?= <3951388+JoeKar@users.noreply.github.com> Date: Tue, 6 Jun 2023 21:59:35 +0200 Subject: [PATCH 5/5] options: Add capability to define the line count parsed for the signature check --- internal/buffer/buffer.go | 8 +++++++- internal/config/settings.go | 2 ++ runtime/help/options.md | 7 +++++++ runtime/syntax/README.md | 2 +- 4 files changed, 17 insertions(+), 2 deletions(-) diff --git a/internal/buffer/buffer.go b/internal/buffer/buffer.go index 29dd823a..bd172d58 100644 --- a/internal/buffer/buffer.go +++ b/internal/buffer/buffer.go @@ -766,9 +766,15 @@ func (b *Buffer) UpdateRules() { if length > 0 { signatureMatch := false if length > 1 { + detectlimit := util.IntOpt(b.Settings["detectlimit"]) + lineCount := len(b.lines) + limit := lineCount + if detectlimit > 0 && lineCount > detectlimit { + limit = detectlimit + } for i := 0; i < length && !signatureMatch; i++ { if syntaxFiles[i].header.HasFileSignature() { - for j := 0; j < 100 && !signatureMatch; j++ { + for j := 0; j < limit && !signatureMatch; j++ { if syntaxFiles[i].header.MatchFileSignature(b.lines[j].data) { syntaxFile = syntaxFiles[i].fileName b.SyntaxDef = syntaxFiles[i].syntaxDef diff --git a/internal/config/settings.go b/internal/config/settings.go index 72e998f1..eca52074 100644 --- a/internal/config/settings.go +++ b/internal/config/settings.go @@ -44,6 +44,7 @@ func init() { var optionValidators = map[string]optionValidator{ "autosave": validateNonNegativeValue, "clipboard": validateClipboard, + "detectlimit": validateNonNegativeValue, "tabsize": validatePositiveValue, "scrollmargin": validateNonNegativeValue, "scrollspeed": validateNonNegativeValue, @@ -280,6 +281,7 @@ var defaultCommonSettings = map[string]interface{}{ "basename": false, "colorcolumn": float64(0), "cursorline": true, + "detectlimit": float64(100), "diffgutter": false, "encoding": "utf-8", "eofnewline": true, diff --git a/runtime/help/options.md b/runtime/help/options.md index 3170dc4c..376efdb5 100644 --- a/runtime/help/options.md +++ b/runtime/help/options.md @@ -100,6 +100,13 @@ Here are the available options: default value: `true` +* `detectlimit`: if this is not set to 0, it will limit the amount of first + lines in a file that are matched to determine the filetype. + A higher limit means better accuracy of guessing the filetype, but also + taking more time. + + default value: `100` + * `diffgutter`: display diff indicators before lines. default value: `false` diff --git a/runtime/syntax/README.md b/runtime/syntax/README.md index e97a4f77..5bcbf138 100644 --- a/runtime/syntax/README.md +++ b/runtime/syntax/README.md @@ -2,7 +2,7 @@ Here are micro's syntax files. -Each yaml file specifies how to detect the filetype based on file extension or given signature. The signature can be matched to a maximum of 100 lines (to limit parse times) for a best "guess". +Each yaml file specifies how to detect the filetype based on file extension or given signature. The signature can be matched to all available lines of the file or to the value defined with the option `detectlimit` (to limit parse times) for a best "guess". Then there are patterns and regions linked to highlight groups which tell micro how to highlight that filetype. Making your own syntax files is very simple. I recommend you check the file after you are finished with the