From 39e410aa46c73a51707b4c8cdd8b5bb702665d6d Mon Sep 17 00:00:00 2001 From: Dmytro Maluka Date: Sat, 23 Mar 2024 21:24:44 +0100 Subject: [PATCH] UpdateRules: reintroduce using header regex for filetype detection Replacing header patterns with signature patterns was a mistake, since both are quite different from each other, and both have their uses. In fact, this caused a serious regression: for such files as shell scripts without *.sh extension but with #!/bin/sh inside, filetype detection does not work at all anymore. Since both header and signature patterns are useful, reintroduce support for header patterns while keeping support for signature patterns as well and make both work nicely together. Also, unlike in the old implementation (before signatures were introduced), ensure that filename matches take precedence over header matches, i.e. if there is at least one filename match found, all header matches are ignored. This makes the behavior more deterministic and prevents previously observed issues like #2894 and #3054: wrongly detected filetypes caused by some overly general header patterns. Precisely, the new behavior is: 1. if there is at least one filename match, use filename matches only 2. if there are no filename matches, use header matches 3. in both cases, try to use signatures to find the best match among multiple filename or header matches --- internal/buffer/buffer.go | 51 +++++++++++++++++++++++++++++---------- 1 file changed, 38 insertions(+), 13 deletions(-) diff --git a/internal/buffer/buffer.go b/internal/buffer/buffer.go index 228af183..0f21f188 100644 --- a/internal/buffer/buffer.go +++ b/internal/buffer/buffer.go @@ -702,6 +702,7 @@ func (b *Buffer) UpdateRules() { } fnameMatches := []syntaxFileBuffer{} + headerMatches := []syntaxFileBuffer{} syntaxFile := "" foundDef := false var header *highlight.Header @@ -719,7 +720,19 @@ func (b *Buffer) UpdateRules() { continue } - if ((ft == "unknown" || ft == "") && header.MatchFileName(b.Path)) || header.FileType == ft { + matchedFileName := false + matchedFileHeader := false + + if ft == "unknown" || ft == "" { + if header.MatchFileName(b.Path) { + matchedFileName = true + } + if len(fnameMatches) == 0 && header.MatchFileHeader(b.lines[0].data) { + matchedFileHeader = true + } + } + + if matchedFileName || matchedFileHeader || header.FileType == ft { file, err := highlight.ParseFile(data) if err != nil { screen.TermMessage("Error parsing syntax file " + f.Name() + ": " + err.Error()) @@ -737,8 +750,12 @@ func (b *Buffer) UpdateRules() { syntaxFile = f.Name() foundDef = true break - } else { + } + + if matchedFileName { fnameMatches = append(fnameMatches, syntaxFileBuffer{header, f.Name(), syndef}) + } else if matchedFileHeader { + headerMatches = append(headerMatches, syntaxFileBuffer{header, f.Name(), syndef}) } } } @@ -762,6 +779,9 @@ func (b *Buffer) UpdateRules() { if header.MatchFileName(b.Path) { fnameMatches = append(fnameMatches, syntaxFileBuffer{header, f.Name(), nil}) } + if len(fnameMatches) == 0 && header.MatchFileHeader(b.lines[0].data) { + headerMatches = append(headerMatches, syntaxFileBuffer{header, f.Name(), nil}) + } } else if header.FileType == ft { syntaxFile = f.Name() break @@ -770,7 +790,12 @@ func (b *Buffer) UpdateRules() { } if syntaxFile == "" { - length := len(fnameMatches) + matches := fnameMatches + if len(matches) == 0 { + matches = headerMatches + } + + length := len(matches) if length > 0 { signatureMatch := false if length > 1 { @@ -781,15 +806,15 @@ func (b *Buffer) UpdateRules() { limit = detectlimit } for i := 0; i < length && !signatureMatch; i++ { - if fnameMatches[i].header.HasFileSignature() { + if matches[i].header.HasFileSignature() { for j := 0; j < limit && !signatureMatch; j++ { - if fnameMatches[i].header.MatchFileSignature(b.lines[j].data) { - syntaxFile = fnameMatches[i].fileName - if fnameMatches[i].syntaxDef != nil { - b.SyntaxDef = fnameMatches[i].syntaxDef + if matches[i].header.MatchFileSignature(b.lines[j].data) { + syntaxFile = matches[i].fileName + if matches[i].syntaxDef != nil { + b.SyntaxDef = matches[i].syntaxDef foundDef = true } - header = fnameMatches[i].header + header = matches[i].header signatureMatch = true } } @@ -797,12 +822,12 @@ func (b *Buffer) UpdateRules() { } } if length == 1 || !signatureMatch { - syntaxFile = fnameMatches[0].fileName - if fnameMatches[0].syntaxDef != nil { - b.SyntaxDef = fnameMatches[0].syntaxDef + syntaxFile = matches[0].fileName + if matches[0].syntaxDef != nil { + b.SyntaxDef = matches[0].syntaxDef foundDef = true } - header = fnameMatches[0].header + header = matches[0].header } } }