From f2fa93ad8bf4eacd3cda82d638a0f63adfd2c1ba Mon Sep 17 00:00:00 2001 From: Bruno BELANYI Date: Sat, 6 Jul 2024 23:33:47 +0100 Subject: [PATCH 1/8] posts: add gap-buffer --- content/posts/2024-07-06-gap-buffer/index.md | 25 ++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 content/posts/2024-07-06-gap-buffer/index.md diff --git a/content/posts/2024-07-06-gap-buffer/index.md b/content/posts/2024-07-06-gap-buffer/index.md new file mode 100644 index 0000000..fa8f1c5 --- /dev/null +++ b/content/posts/2024-07-06-gap-buffer/index.md @@ -0,0 +1,25 @@ +--- +title: "Gap Buffer" +date: 2024-07-06T21:27:19+01:00 +draft: false # I don't care for draft mode, git has branches for that +description: "As featured in GNU Emacs" +tags: + - algorithms + - data structures + - python +categories: + - programming +series: + - Cool algorithms +favorite: false +disable_feed: false +--- + +The [_Gap Buffer_][wiki] is a popular data structure for text editors to +represent files and editable buffers. The most famous of them probably being +[GNU Emacs][emacs]. + +[wiki]: https://en.wikipedia.org/wiki/Gap_buffer +[emacs]: https://www.gnu.org/software/emacs/manual/html_node/elisp/Buffer-Gap.html + + From 51a1bd01cd98daca2c3f374d7d2cbe96f88e4c0c Mon Sep 17 00:00:00 2001 From: Bruno BELANYI Date: Sat, 6 Jul 2024 23:34:49 +0100 Subject: [PATCH 2/8] posts: gap-buffer: add presentation --- content/posts/2024-07-06-gap-buffer/index.md | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/content/posts/2024-07-06-gap-buffer/index.md b/content/posts/2024-07-06-gap-buffer/index.md index fa8f1c5..d13ef4e 100644 --- a/content/posts/2024-07-06-gap-buffer/index.md +++ b/content/posts/2024-07-06-gap-buffer/index.md @@ -23,3 +23,13 @@ represent files and editable buffers. The most famous of them probably being [emacs]: https://www.gnu.org/software/emacs/manual/html_node/elisp/Buffer-Gap.html + +## What does it do? + +A _Gap Buffer_ is simply a list of characters, similar to a normal string, with +the added twist of splitting it into two side: the prefix and suffix, on either +side of the cursor. In between them, a gap is left to allow for quick +insertion at the cursor. + +Moving the cursor moves the gap around the buffer, the prefix and suffix getting +shorter/longer as required. From a9f003f4ee168705762a503fa7762059e6c16ec5 Mon Sep 17 00:00:00 2001 From: Bruno BELANYI Date: Sat, 6 Jul 2024 23:35:39 +0100 Subject: [PATCH 3/8] posts: gap-buffer: add construction --- content/posts/2024-07-06-gap-buffer/index.md | 39 ++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/content/posts/2024-07-06-gap-buffer/index.md b/content/posts/2024-07-06-gap-buffer/index.md index d13ef4e..db5d92b 100644 --- a/content/posts/2024-07-06-gap-buffer/index.md +++ b/content/posts/2024-07-06-gap-buffer/index.md @@ -33,3 +33,42 @@ insertion at the cursor. Moving the cursor moves the gap around the buffer, the prefix and suffix getting shorter/longer as required. + +## Implementation + +I'll be writing a sample implementation in Python, as with the rest of the +[series]({{< ref "/series/cool-algorithms/">}}). I don't think it showcases the +elegance of the _Gap Buffer_ in action like a C implementation full of +`memmove`s would, but it does makes it short and sweet. + +### Representation + +We'll be representing the gap buffer as an actual list of characters. + +Given that Python doesn't _have_ characters, let's settle for a list of strings, +each representing a single character... + +```python +Char = str + +class GapBuffer: + # List of characters, contains prefix and suffix of string with gap in the middle + _buf: list[Char] + # The gap is contained between [start, end) (i.e: buf[start:end]) + _gap_start: int + _gap_end: int + + # Visual representation of the gap buffer: + # This is a very [ ]long string. + # |<----------------------------------------------->| capacity + # |<------------>| |<-------->| string + # |<------------------->| gap + # |<------------>| prefix + # |<-------->| suffix + def __init__(self, initial_capacity: int = 16) -> None: + assert initial_capacity > 0 + # Initialize an empty gap buffer + self._buf = [""] * initial_capacity + self._gap_start = 0 + self._gap_end = initial_capacity +``` From 408b74daf7d0d43907aa33eb1aea3bd961200ab7 Mon Sep 17 00:00:00 2001 From: Bruno BELANYI Date: Sat, 6 Jul 2024 23:36:02 +0100 Subject: [PATCH 4/8] posts: gap-buffer: add accessors --- content/posts/2024-07-06-gap-buffer/index.md | 27 ++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/content/posts/2024-07-06-gap-buffer/index.md b/content/posts/2024-07-06-gap-buffer/index.md index db5d92b..1071a24 100644 --- a/content/posts/2024-07-06-gap-buffer/index.md +++ b/content/posts/2024-07-06-gap-buffer/index.md @@ -72,3 +72,30 @@ class GapBuffer: self._gap_start = 0 self._gap_end = initial_capacity ``` + +### Accessors + +I'm mostly adding these for exposition, and making it easier to write `assert`s +later. + +```python +@property +def capacity(self) -> int: + return len(self._buf) + +@property +def gap_length(self) -> int: + return self._gap_end - self._gap_start + +@property +def string_length(self) -> int: + return self.capacity - self.gap_length + +@property +def prefix_length(self) -> int: + return self._gap_start + +@property +def suffix_length(self) -> int: + return self.capacity - self._gap_end +``` From 06c4a03a42b2e61684830fb115270fd152c17cfe Mon Sep 17 00:00:00 2001 From: Bruno BELANYI Date: Sat, 6 Jul 2024 23:36:20 +0100 Subject: [PATCH 5/8] posts: gap-buffer: add growth --- content/posts/2024-07-06-gap-buffer/index.md | 22 ++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/content/posts/2024-07-06-gap-buffer/index.md b/content/posts/2024-07-06-gap-buffer/index.md index 1071a24..5e5cd4c 100644 --- a/content/posts/2024-07-06-gap-buffer/index.md +++ b/content/posts/2024-07-06-gap-buffer/index.md @@ -99,3 +99,25 @@ def prefix_length(self) -> int: def suffix_length(self) -> int: return self.capacity - self._gap_end ``` + +### Growing the buffer + +I've written this method in a somewhat non-idiomatic manner, to make it closer +to how it would look in C using `realloc` instead. + +It would be more efficient to use slicing to insert the needed extra capacity +directly, instead of making a new buffer and copying characters over. + +```python +def grow(self, capacity: int) -> None: + assert capacity >= self.capacity + # Create a new buffer with the new capacity + new_buf = [""] * capacity + # Move the prefix/suffix to their place in the new buffer + added_capacity = capacity - len(self._buf) + new_buf[: self._gap_start] = self._buf[: self._gap_start] + new_buf[self._gap_end + added_capacity :] = self._buf[self._gap_end :] + # Use the new buffer, account for added capacity + self._buf = new_buf + self._gap_end += added_capacity +``` From dbbcd528c3e5292cdd3548ee22c0a15408344079 Mon Sep 17 00:00:00 2001 From: Bruno BELANYI Date: Sat, 6 Jul 2024 23:36:33 +0100 Subject: [PATCH 6/8] posts: gap-buffer: add insertion --- content/posts/2024-07-06-gap-buffer/index.md | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/content/posts/2024-07-06-gap-buffer/index.md b/content/posts/2024-07-06-gap-buffer/index.md index 5e5cd4c..8c13eb1 100644 --- a/content/posts/2024-07-06-gap-buffer/index.md +++ b/content/posts/2024-07-06-gap-buffer/index.md @@ -121,3 +121,22 @@ def grow(self, capacity: int) -> None: self._buf = new_buf self._gap_end += added_capacity ``` + +### Insertion + +Inserting text at the cursor's position means filling up the gap in the middle +of the buffer. To do so we must first make sure that the gap is big enough, or +grow the buffer accordingly. + +Then inserting the text is simply a matter of copying its characters in place, +and moving the start of the gap further right. + +```python +def insert(self, val: str) -> None: + # Ensure we have enouh space to insert the whole string + if len(val) > self.gap_length: + self.grow(max(self.capacity * 2, self.string_length + len(val))) + # Fill the gap with the given string + self._buf[self._gap_start : self._gap_start + len(val)] = val + self._gap_start += len(val) +``` From 11138dafd16fd8eae9d0b6f2764faa22d80e9100 Mon Sep 17 00:00:00 2001 From: Bruno BELANYI Date: Sat, 6 Jul 2024 23:36:46 +0100 Subject: [PATCH 7/8] posts: gap-buffer: add deletion --- content/posts/2024-07-06-gap-buffer/index.md | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/content/posts/2024-07-06-gap-buffer/index.md b/content/posts/2024-07-06-gap-buffer/index.md index 8c13eb1..929955e 100644 --- a/content/posts/2024-07-06-gap-buffer/index.md +++ b/content/posts/2024-07-06-gap-buffer/index.md @@ -140,3 +140,22 @@ def insert(self, val: str) -> None: self._buf[self._gap_start : self._gap_start + len(val)] = val self._gap_start += len(val) ``` + +### Deletion + +Removing text from the buffer simply expands the gap in the corresponding +direction, shortening the string's prefix/suffix. This makes it very cheap. + +The methods are named after the `backspace` and `delete` keys on the keyboard. + +```python +def backspace(self, dist: int = 1) -> None: + assert dist <= self.prefix_length + # Extend gap to the left + self._gap_start -= dist + +def delete(self, dist: int = 1) -> None: + assert dist <= self.suffix_length + # Extend gap to the right + self._gap_end += dist +``` From e8acb49b53d3dc0e17d606c38f699bfdae7a4edf Mon Sep 17 00:00:00 2001 From: Bruno BELANYI Date: Sat, 6 Jul 2024 23:41:31 +0100 Subject: [PATCH 8/8] posts: gap-buffer: add movement --- content/posts/2024-07-06-gap-buffer/index.md | 30 ++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/content/posts/2024-07-06-gap-buffer/index.md b/content/posts/2024-07-06-gap-buffer/index.md index 929955e..a9aac96 100644 --- a/content/posts/2024-07-06-gap-buffer/index.md +++ b/content/posts/2024-07-06-gap-buffer/index.md @@ -159,3 +159,33 @@ def delete(self, dist: int = 1) -> None: # Extend gap to the right self._gap_end += dist ``` + +### Moving the cursor + +Moving the cursor along the buffer will shift letters from one side of the gap +to the other, moving them accross from prefix to suffix and back. + +I find Python's list slicing not quite as elegant to read as a `memmove`, though +it does make for a very small and efficient implementation. + +```python +def left(self, dist: int = 1) -> None: + assert dist <= self.prefix_length + # Shift the needed number of characters from end of prefix to start of suffix + self._buf[self._gap_end - dist : self._gap_end] = self._buf[ + self._gap_start - dist : self._gap_start + ] + # Adjust indices accordingly + self._gap_start -= dist + self._gap_end -= dist + +def right(self, dist: int = 1) -> None: + assert dist <= self.suffix_length + # Shift the needed number of characters from start of suffix to end of prefix + self._buf[self._gap_start : self._gap_start + dist] = self._buf[ + self._gap_end : self._gap_end + dist + ] + # Adjust indices accordingly + self._gap_start += dist + self._gap_end += dist +```