From e0f5361d97a77806afde6a0887d0cb2c09ab8508 Mon Sep 17 00:00:00 2001 From: Dmytro Maluka Date: Sat, 17 Aug 2024 16:44:53 +0200 Subject: [PATCH 1/7] calcHash: remove unneeded h.Write() error checks According to the Go hash package documentation [1]: type Hash interface { // Write (via the embedded io.Writer interface) adds more data to the running hash. // It never returns an error. io.Writer [1] https://pkg.go.dev/hash#Hash --- internal/buffer/buffer.go | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/internal/buffer/buffer.go b/internal/buffer/buffer.go index 661ea8b5..a7aca541 100644 --- a/internal/buffer/buffer.go +++ b/internal/buffer/buffer.go @@ -654,22 +654,13 @@ func calcHash(b *Buffer, out *[md5.Size]byte) error { size := 0 if len(b.lines) > 0 { - n, e := h.Write(b.lines[0].data) - if e != nil { - return e - } + n, _ := h.Write(b.lines[0].data) size += n for _, l := range b.lines[1:] { - n, e = h.Write([]byte{'\n'}) - if e != nil { - return e - } + n, _ = h.Write([]byte{'\n'}) size += n - n, e = h.Write(l.data) - if e != nil { - return e - } + n, _ = h.Write(l.data) size += n } } From c0f6b65ed628bd86946e49d77ca84397ba3e40a5 Mon Sep 17 00:00:00 2001 From: Dmytro Maluka Date: Sat, 17 Aug 2024 16:56:15 +0200 Subject: [PATCH 2/7] calcHash: use correct line endings Make calcHash() respect the buffer's file endings (unix vs dos), to make its calculation of the file size consistent with how we calculate it in other cases (i.e. when opening or saving the file) and with the `fastdirty` option documentation, i.e. make calcHash() return ErrFileTooLarge if and only if the exact file size exceeds 50KB. --- internal/buffer/buffer.go | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/internal/buffer/buffer.go b/internal/buffer/buffer.go index a7aca541..c1bf6f10 100644 --- a/internal/buffer/buffer.go +++ b/internal/buffer/buffer.go @@ -658,7 +658,11 @@ func calcHash(b *Buffer, out *[md5.Size]byte) error { size += n for _, l := range b.lines[1:] { - n, _ = h.Write([]byte{'\n'}) + if b.Endings == FFDos { + n, _ = h.Write([]byte{'\r', '\n'}) + } else { + n, _ = h.Write([]byte{'\n'}) + } size += n n, _ = h.Write(l.data) size += n From 352fd2be223bd1b12adb8abf3dc74651c89f9394 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6ran=20Karl?= <3951388+JoeKar@users.noreply.github.com> Date: Sun, 21 Jul 2024 21:20:29 +0200 Subject: [PATCH 3/7] buffer/settings: On `fastdirty` off set to on in case of "large file" This behavior is then aligned to the actual documentation of `fastdirty`. Additionally set the origHash to zero in case the buffer was already modified. --- internal/buffer/settings.go | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/internal/buffer/settings.go b/internal/buffer/settings.go index aa011240..0af90ff8 100644 --- a/internal/buffer/settings.go +++ b/internal/buffer/settings.go @@ -1,6 +1,8 @@ package buffer import ( + "crypto/md5" + "github.com/zyedidia/micro/v2/internal/config" "github.com/zyedidia/micro/v2/internal/screen" ) @@ -13,7 +15,12 @@ func (b *Buffer) SetOptionNative(option string, nativeValue interface{}) error { if !b.Modified() { e := calcHash(b, &b.origHash) if e == ErrFileTooLarge { - b.Settings["fastdirty"] = false + b.Settings["fastdirty"] = true + } + } else { + b.origHash = [md5.Size]byte{} + if b.Size() > LargeFileThreshold { + b.Settings["fastdirty"] = true } } } From 93efc9eabe23adec1a33049470dced484f0d1630 Mon Sep 17 00:00:00 2001 From: Dmytro Maluka Date: Sun, 18 Aug 2024 14:40:31 +0200 Subject: [PATCH 4/7] buffer/settings: Don't use Modified() before we updated origHash When we have already enabled `fastdirty` but have not updated origHash yet, we shouldn't use Modified() since it depends on origHash which is still outdated, and thus returns wrong values. This fixes the following issue: enable `fastdirty`, modify the buffer, save the buffer and disable `fastdirty` -> micro wrongly reports the buffer as modified (whereas it has just been saved). Note that this fix, though, also causes a regression: e.g. if we run `set fastdirty false` while fastdirty is already disabled, micro may unexpectedly report a non-modified buffer as modified (in the case if isModified is true but the buffer it actually not modified, since its md5 sum matches and fastdirty is disabled), since this fix assumes that since we are disabling fastdirty, it has been enabled. This shall be fixed by PR #3343 which makes `set` do nothing if the option value doesn't change. --- internal/buffer/settings.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/buffer/settings.go b/internal/buffer/settings.go index 0af90ff8..e934da2f 100644 --- a/internal/buffer/settings.go +++ b/internal/buffer/settings.go @@ -12,7 +12,7 @@ func (b *Buffer) SetOptionNative(option string, nativeValue interface{}) error { if option == "fastdirty" { if !nativeValue.(bool) { - if !b.Modified() { + if !b.isModified { e := calcHash(b, &b.origHash) if e == ErrFileTooLarge { b.Settings["fastdirty"] = true From 7fe98ccfee20daf38b49c358f1d29f7df77dd7b4 Mon Sep 17 00:00:00 2001 From: Dmytro Maluka Date: Sun, 18 Aug 2024 15:10:07 +0200 Subject: [PATCH 5/7] calcHash: Remove checking file size Let calcHash() unconditionally hash whatever buffer it is asked to hash, and let its callers explicitly check if the buffer is too large before calling calcHash(). This makes things simpler and less error-prone (no extra source of truth about whether the file is too large, we don't need to remember to check if calcHash() fails, we can be sure calcHash() will actually update the provided hash), and actually faster (since just calculating the buffer size, i.e. adding line lengths, is faster than md5 calculation). In particular, this fixes the following bugs: 1. Since ReOpen() doesn't check calcHash() return value, if the reloaded file is too large while the old version of the file is not, calcHash() returns ErrFileTooLarge and doesn't update origHash, so so Modified() returns true since the reloaded file's md5 sum doesn't match the old origHash, so micro wrongly reports the newly reloaded file as modified. 2. Since Modified() doesn't check calcHash() return value, Modified() may return false positives or false negatives if the buffer has *just* become too large so calcHash() returns ErrFileTooLarge and doesn't update `buff`. --- internal/buffer/buffer.go | 23 +++++------------------ internal/buffer/settings.go | 14 ++++++-------- 2 files changed, 11 insertions(+), 26 deletions(-) diff --git a/internal/buffer/buffer.go b/internal/buffer/buffer.go index c1bf6f10..1d899663 100644 --- a/internal/buffer/buffer.go +++ b/internal/buffer/buffer.go @@ -64,10 +64,6 @@ var ( // BTStdout is a buffer that only writes to stdout // when closed BTStdout = BufType{6, false, true, true} - - // ErrFileTooLarge is returned when the file is too large to hash - // (fastdirty is automatically enabled) - ErrFileTooLarge = errors.New("File is too large to hash") ) // SharedBuffer is a struct containing info that is shared among buffers @@ -649,32 +645,23 @@ func (b *Buffer) Size() int { } // calcHash calculates md5 hash of all lines in the buffer -func calcHash(b *Buffer, out *[md5.Size]byte) error { +func calcHash(b *Buffer, out *[md5.Size]byte) { h := md5.New() - size := 0 if len(b.lines) > 0 { - n, _ := h.Write(b.lines[0].data) - size += n + h.Write(b.lines[0].data) for _, l := range b.lines[1:] { if b.Endings == FFDos { - n, _ = h.Write([]byte{'\r', '\n'}) + h.Write([]byte{'\r', '\n'}) } else { - n, _ = h.Write([]byte{'\n'}) + h.Write([]byte{'\n'}) } - size += n - n, _ = h.Write(l.data) - size += n + h.Write(l.data) } } - if size > LargeFileThreshold { - return ErrFileTooLarge - } - h.Sum((*out)[:0]) - return nil } func parseDefFromFile(f config.RuntimeFile, header *highlight.Header) *highlight.Def { diff --git a/internal/buffer/settings.go b/internal/buffer/settings.go index e934da2f..9e76697c 100644 --- a/internal/buffer/settings.go +++ b/internal/buffer/settings.go @@ -12,15 +12,13 @@ func (b *Buffer) SetOptionNative(option string, nativeValue interface{}) error { if option == "fastdirty" { if !nativeValue.(bool) { - if !b.isModified { - e := calcHash(b, &b.origHash) - if e == ErrFileTooLarge { - b.Settings["fastdirty"] = true - } + if b.Size() > LargeFileThreshold { + b.Settings["fastdirty"] = true } else { - b.origHash = [md5.Size]byte{} - if b.Size() > LargeFileThreshold { - b.Settings["fastdirty"] = true + if !b.isModified { + calcHash(b, &b.origHash) + } else { + b.origHash = [md5.Size]byte{} } } } From d31095fe8f7e8a6fa2750c0b4480761de93c66b9 Mon Sep 17 00:00:00 2001 From: Dmytro Maluka Date: Sun, 18 Aug 2024 15:19:19 +0200 Subject: [PATCH 6/7] buffer/settings: Add comment why do we need to zero origHash --- internal/buffer/settings.go | 1 + 1 file changed, 1 insertion(+) diff --git a/internal/buffer/settings.go b/internal/buffer/settings.go index 9e76697c..9d671dcf 100644 --- a/internal/buffer/settings.go +++ b/internal/buffer/settings.go @@ -18,6 +18,7 @@ func (b *Buffer) SetOptionNative(option string, nativeValue interface{}) error { if !b.isModified { calcHash(b, &b.origHash) } else { + // prevent using an old stale origHash value b.origHash = [md5.Size]byte{} } } From 0b15b57e63fa6ecb3eabb24db1bcf8667f9e4c0e Mon Sep 17 00:00:00 2001 From: Dmytro Maluka Date: Sun, 18 Aug 2024 15:33:35 +0200 Subject: [PATCH 7/7] buffer: Set fastdirty=true for large file when reopening Similarly to how we force `fastdirty` to true when opening a large file (when creating the buffer), force it also when reopening a file, in case the file on disk became large since we opened it. --- internal/buffer/buffer.go | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/internal/buffer/buffer.go b/internal/buffer/buffer.go index 1d899663..3073eba0 100644 --- a/internal/buffer/buffer.go +++ b/internal/buffer/buffer.go @@ -550,7 +550,11 @@ func (b *Buffer) ReOpen() error { err = b.UpdateModTime() if !b.Settings["fastdirty"].(bool) { - calcHash(b, &b.origHash) + if len(data) > LargeFileThreshold { + b.Settings["fastdirty"] = true + } else { + calcHash(b, &b.origHash) + } } b.isModified = false b.RelocateCursors()