From 55982909d2ce8b7f3cc67de3d97f1982316ecc41 Mon Sep 17 00:00:00 2001
From: Bruno BELANYI <bruno@belanyi.fr>
Date: Sun, 30 Jun 2024 12:37:21 +0100
Subject: [PATCH 01/16] posts: trie: add insertion

---
 content/posts/2024-06-30-trie/index.md | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/content/posts/2024-06-30-trie/index.md b/content/posts/2024-06-30-trie/index.md
index 968aa0e..b8e4679 100644
--- a/content/posts/2024-06-30-trie/index.md
+++ b/content/posts/2024-06-30-trie/index.md
@@ -78,3 +78,22 @@ def get(self, key: str) -> T | None:
     # Otherwise, recurse on the child corresponding to the first letter
     return self._children[key[0]].get(key[1:])
 ```
+
+### Insertion
+
+Adding a new value to the _Trie_ is similar to a key lookup, only this time we
+store the new value instead of returning it.
+
+```python
+def insert(self, key: str, value: T) -> bool:
+    # Have we matched the full key?
+    if not key:
+        # Check whether we're overwriting a previous mapping
+        was_mapped = self._value is None
+        # Store the corresponding value
+        self._value = value
+        # Return whether we've performed an overwrite
+        return was_mapped
+      # Otherwise, recurse on the child corresponding to the first letter
+      return self._children[key[0]].insert(key[1:], value)
+```

From 239d5c3dbdf5cc59014971da326654d1b76fd821 Mon Sep 17 00:00:00 2001
From: Bruno BELANYI <bruno@belanyi.fr>
Date: Sat, 6 Jul 2024 23:33:47 +0100
Subject: [PATCH 02/16] posts: add gap-buffer

---
 content/posts/2024-07-06-gap-buffer/index.md | 25 ++++++++++++++++++++
 1 file changed, 25 insertions(+)
 create mode 100644 content/posts/2024-07-06-gap-buffer/index.md

diff --git a/content/posts/2024-07-06-gap-buffer/index.md b/content/posts/2024-07-06-gap-buffer/index.md
new file mode 100644
index 0000000..43b992f
--- /dev/null
+++ b/content/posts/2024-07-06-gap-buffer/index.md
@@ -0,0 +1,25 @@
+---
+title: "Gap Buffer"
+date: 2024-07-06T21:27:19+01:00
+draft: false # I don't care for draft mode, git has branches for that
+description: "As featured in GNU Emacs"
+tags:
+- algorithms
+- data structures
+- python
+categories:
+- programming
+series:
+- Cool algorithms
+favorite: false
+disable_feed: false
+---
+
+The [_Gap Buffer_][wiki] is a popular data structure for text editors to
+represent files and editable buffers. The most famous of them probably being
+[GNU Emacs][emacs].
+
+[wiki]: https://en.wikipedia.org/wiki/Gap_buffer
+[emacs]: https://www.gnu.org/software/emacs/manual/html_node/elisp/Buffer-Gap.html
+
+<!--more-->

From a4976aeefb40da97f384dacbad249ea3d29e330f Mon Sep 17 00:00:00 2001
From: Bruno BELANYI <bruno@belanyi.fr>
Date: Sat, 6 Jul 2024 23:34:49 +0100
Subject: [PATCH 03/16] posts: gap-buffer: add presentation

---
 content/posts/2024-07-06-gap-buffer/index.md | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/content/posts/2024-07-06-gap-buffer/index.md b/content/posts/2024-07-06-gap-buffer/index.md
index 43b992f..44d699f 100644
--- a/content/posts/2024-07-06-gap-buffer/index.md
+++ b/content/posts/2024-07-06-gap-buffer/index.md
@@ -23,3 +23,13 @@ represent files and editable buffers. The most famous of them probably being
 [emacs]: https://www.gnu.org/software/emacs/manual/html_node/elisp/Buffer-Gap.html
 
 <!--more-->
+
+## What does it do?
+
+A _Gap Buffer_ is simply a list of characters, similar to a normal string, with
+the added twist of splitting it into two side: the prefix and suffix, on either
+side of the cursor. In between them, a gap is left to allow for quick
+insertion at the cursor.
+
+Moving the cursor moves the gap around the buffer, the prefix and suffix getting
+shorter/longer as required.

From 091e8527e3aae666b4a03b2cef919451d84e1c68 Mon Sep 17 00:00:00 2001
From: Bruno BELANYI <bruno@belanyi.fr>
Date: Sat, 6 Jul 2024 23:35:39 +0100
Subject: [PATCH 04/16] posts: gap-buffer: add construction

---
 content/posts/2024-07-06-gap-buffer/index.md | 39 ++++++++++++++++++++
 1 file changed, 39 insertions(+)

diff --git a/content/posts/2024-07-06-gap-buffer/index.md b/content/posts/2024-07-06-gap-buffer/index.md
index 44d699f..2b02dc4 100644
--- a/content/posts/2024-07-06-gap-buffer/index.md
+++ b/content/posts/2024-07-06-gap-buffer/index.md
@@ -33,3 +33,42 @@ insertion at the cursor.
 
 Moving the cursor moves the gap around the buffer, the prefix and suffix getting
 shorter/longer as required.
+
+## Implementation
+
+I'll be writing a sample implementation in Python, as with the rest of the
+[series]({{< ref "/series/cool-algorithms/">}}). I don't think it showcases the
+elegance of the _Gap Buffer_ in action like a C implementation full of
+`memmove`s would, but it does makes it short and sweet.
+
+### Representation
+
+We'll be representing the gap buffer as an actual list of characters.
+
+Given that Python doesn't _have_ characters, let's settle for a list of strings,
+each representing a single character...
+
+```python
+Char = str
+
+class GapBuffer:
+    # List of characters, contains prefix and suffix of string with gap in the middle
+    _buf: list[Char]
+    # The gap is contained between [start, end) (i.e: buf[start:end])
+    _gap_start: int
+    _gap_end: int
+
+    # Visual representation of the gap buffer:
+    # This is a very  [                     ]long string.
+    # |<----------------------------------------------->| capacity
+    # |<------------>|                       |<-------->| string
+    #                 |<------------------->|             gap
+    # |<------------>|                                    prefix
+    #                                        |<-------->| suffix
+    def __init__(self, initial_capacity: int = 16) -> None:
+        assert initial_capacity > 0
+        # Initialize an empty gap buffer
+        self._buf = [""] * initial_capacity
+        self._gap_start = 0
+        self._gap_end = initial_capacity
+```

From 4d69be06334bee81a10625b215d0b97c206b44c3 Mon Sep 17 00:00:00 2001
From: Bruno BELANYI <bruno@belanyi.fr>
Date: Sat, 6 Jul 2024 23:36:02 +0100
Subject: [PATCH 05/16] posts: gap-buffer: add accessors

---
 content/posts/2024-07-06-gap-buffer/index.md | 27 ++++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/content/posts/2024-07-06-gap-buffer/index.md b/content/posts/2024-07-06-gap-buffer/index.md
index 2b02dc4..a90e8a4 100644
--- a/content/posts/2024-07-06-gap-buffer/index.md
+++ b/content/posts/2024-07-06-gap-buffer/index.md
@@ -72,3 +72,30 @@ class GapBuffer:
         self._gap_start = 0
         self._gap_end = initial_capacity
 ```
+
+### Accessors
+
+I'm mostly adding these for exposition, and making it easier to write `assert`s
+later.
+
+```python
+@property
+def capacity(self) -> int:
+  return len(self._buf)
+
+@property
+def gap_length(self) -> int:
+  return self._gap_end - self._gap_start
+
+@property
+def string_length(self) -> int:
+  return self.capacity - self.gap_length
+
+@property
+def prefix_length(self) -> int:
+  return self._gap_start
+
+@property
+def suffix_length(self) -> int:
+  return self.capacity - self._gap_end
+```

From f4a64b2a37a75f81438925f0598204a052afd2f8 Mon Sep 17 00:00:00 2001
From: Bruno BELANYI <bruno@belanyi.fr>
Date: Sun, 14 Jul 2024 17:53:25 +0100
Subject: [PATCH 06/16] posts: add bloom-filter

---
 .../posts/2024-07-14-bloom-filter/index.md    | 26 +++++++++++++++++++
 1 file changed, 26 insertions(+)
 create mode 100644 content/posts/2024-07-14-bloom-filter/index.md

diff --git a/content/posts/2024-07-14-bloom-filter/index.md b/content/posts/2024-07-14-bloom-filter/index.md
new file mode 100644
index 0000000..98cfc1e
--- /dev/null
+++ b/content/posts/2024-07-14-bloom-filter/index.md
@@ -0,0 +1,26 @@
+---
+title: "Bloom Filter"
+date: 2024-07-14T17:46:40+01:00
+draft: false # I don't care for draft mode, git has branches for that
+description: "Probably cool"
+tags:
+  - algorithms
+  - data structures
+  - python
+categories:
+  - programming
+series:
+- Cool algorithms
+favorite: false
+disable_feed: false
+---
+
+The [_Bloom Filter_][wiki] is a probabilistic data structure for set membership.
+
+The filter can be used as an inexpensive first step when querying the actual
+data is quite costly (e.g: as a first check for expensive cache lookups or large
+data seeks).
+
+[wiki]: https://en.wikipedia.org/wiki/Bloom_filter
+
+<!--more-->

From 0084c8717a062f4fb70b033c411871f18276ea79 Mon Sep 17 00:00:00 2001
From: Bruno BELANYI <bruno@belanyi.fr>
Date: Sat, 6 Jul 2024 23:36:20 +0100
Subject: [PATCH 07/16] posts: gap-buffer: add growth

---
 content/posts/2024-07-06-gap-buffer/index.md | 22 ++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/content/posts/2024-07-06-gap-buffer/index.md b/content/posts/2024-07-06-gap-buffer/index.md
index a90e8a4..ace8fd9 100644
--- a/content/posts/2024-07-06-gap-buffer/index.md
+++ b/content/posts/2024-07-06-gap-buffer/index.md
@@ -99,3 +99,25 @@ def prefix_length(self) -> int:
 def suffix_length(self) -> int:
   return self.capacity - self._gap_end
 ```
+
+### Growing the buffer
+
+I've written this method in a somewhat non-idiomatic manner, to make it closer
+to how it would look in C using `realloc` instead.
+
+It would be more efficient to use slicing to insert the needed extra capacity
+directly, instead of making a new buffer and copying characters over.
+
+```python
+def grow(self, capacity: int) -> None:
+    assert capacity >= self.capacity
+    # Create a new buffer with the new capacity
+    new_buf = [""] * capacity
+    # Move the prefix/suffix to their place in the new buffer
+    added_capacity = capacity - len(self._buf)
+    new_buf[: self._gap_start] = self._buf[: self._gap_start]
+    new_buf[self._gap_end + added_capacity :] = self._buf[self._gap_end :]
+    # Use the new buffer, account for added capacity
+    self._buf = new_buf
+    self._gap_end += added_capacity
+```

From 3992996a89dc3183c2563939cbd8de2a941cd393 Mon Sep 17 00:00:00 2001
From: Bruno BELANYI <bruno@belanyi.fr>
Date: Sun, 14 Jul 2024 17:54:59 +0100
Subject: [PATCH 08/16] posts: bloom-filter: add presentation

---
 content/posts/2024-07-14-bloom-filter/index.md | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/content/posts/2024-07-14-bloom-filter/index.md b/content/posts/2024-07-14-bloom-filter/index.md
index 98cfc1e..0a82882 100644
--- a/content/posts/2024-07-14-bloom-filter/index.md
+++ b/content/posts/2024-07-14-bloom-filter/index.md
@@ -24,3 +24,16 @@ data seeks).
 [wiki]: https://en.wikipedia.org/wiki/Bloom_filter
 
 <!--more-->
+
+## What does it do?
+
+A _Bloom Filter_ can be understood as a hash-set which can either tell you:
+
+* An element is _not_ part of the set.
+* An element _may be_ part of the set.
+
+More specifically, one can tweak the parameters of the filter to make it so that
+the _false positive_ rate of membership is quite low.
+
+I won't be going into those calculations here, but they are quite trivial to
+compute, or one can just look up appropriate values for their use case.

From 72057a3224cf0f537d66e698ab9db37bfd7d5044 Mon Sep 17 00:00:00 2001
From: Bruno BELANYI <bruno@belanyi.fr>
Date: Sat, 6 Jul 2024 23:36:33 +0100
Subject: [PATCH 09/16] posts: gap-buffer: add insertion

---
 content/posts/2024-07-06-gap-buffer/index.md | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/content/posts/2024-07-06-gap-buffer/index.md b/content/posts/2024-07-06-gap-buffer/index.md
index ace8fd9..b23f21a 100644
--- a/content/posts/2024-07-06-gap-buffer/index.md
+++ b/content/posts/2024-07-06-gap-buffer/index.md
@@ -121,3 +121,22 @@ def grow(self, capacity: int) -> None:
     self._buf = new_buf
     self._gap_end += added_capacity
 ```
+
+### Insertion
+
+Inserting text at the cursor's position means filling up the gap in the middle
+of the buffer. To do so we must first make sure that the gap is big enough, or
+grow the buffer accordingly.
+
+Then inserting the text is simply a matter of copying its characters in place,
+and moving the start of the gap further right.
+
+```python
+def insert(self, val: str) -> None:
+    # Ensure we have enouh space to insert the whole string
+    if len(val) > self.gap_length:
+        self.grow(max(self.capacity * 2, self.string_length + len(val)))
+    # Fill the gap with the given string
+    self._buf[self._gap_start : self._gap_start + len(val)] = val
+    self._gap_start += len(val)
+```

From 1d37e00b3a9afe578ef4441e0b49ac375dc62a03 Mon Sep 17 00:00:00 2001
From: Bruno BELANYI <bruno@belanyi.fr>
Date: Sun, 30 Jun 2024 12:37:48 +0100
Subject: [PATCH 10/16] posts: trie: add removal

---
 content/posts/2024-06-30-trie/index.md | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/content/posts/2024-06-30-trie/index.md b/content/posts/2024-06-30-trie/index.md
index b8e4679..2a0d77e 100644
--- a/content/posts/2024-06-30-trie/index.md
+++ b/content/posts/2024-06-30-trie/index.md
@@ -97,3 +97,20 @@ def insert(self, key: str, value: T) -> bool:
       # Otherwise, recurse on the child corresponding to the first letter
       return self._children[key[0]].insert(key[1:], value)
 ```
+
+### Removal
+
+Removal should also look familiar.
+
+```python
+def remove(self, key: str) -> bool:
+    # Have we matched the full key?
+    if not key:
+        was_mapped = self._value is None
+        # Remove the value
+        self._value = None
+        # Return whether it was mapped
+        return was_mapped
+    # Otherwise, recurse on the child corresponding to the first letter
+    return self._children[key[0]].remove(key[1:])
+```

From 798116716f528a5a439d1bc490ec1a955d548e04 Mon Sep 17 00:00:00 2001
From: Bruno BELANYI <bruno@belanyi.fr>
Date: Sun, 14 Jul 2024 17:55:15 +0100
Subject: [PATCH 11/16] posts: bloom-filter: add construction

---
 .../posts/2024-07-14-bloom-filter/index.md    | 25 +++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/content/posts/2024-07-14-bloom-filter/index.md b/content/posts/2024-07-14-bloom-filter/index.md
index 0a82882..547d50f 100644
--- a/content/posts/2024-07-14-bloom-filter/index.md
+++ b/content/posts/2024-07-14-bloom-filter/index.md
@@ -37,3 +37,28 @@ the _false positive_ rate of membership is quite low.
 
 I won't be going into those calculations here, but they are quite trivial to
 compute, or one can just look up appropriate values for their use case.
+
+## Implementation
+
+I'll be using Python, which has the nifty ability of representing bitsets
+through its built-in big integers quite easily.
+
+We'll be assuming a `BIT_COUNT` of 64 here, but the implementation can easily be
+tweaked to use a different number, or even change it at construction time.
+
+### Representation
+
+A `BloomFilter` is just a set of bits and a list of hash functions.
+
+```python
+BIT_COUNT = 64
+
+class BloomFilter[T]:
+    _bits: int
+    _hash_functions: list[Callable[[T], int]]
+
+    def __init__(self, hash_functions: list[Callable[[T], int]]) -> None:
+        # Filter is initially empty
+        self._bits = 0
+        self._hash_functions = hash_functions
+```

From e05ed1cc4aac43a8c266bbff31a3919b672225e9 Mon Sep 17 00:00:00 2001
From: Bruno BELANYI <bruno@belanyi.fr>
Date: Sat, 6 Jul 2024 23:36:46 +0100
Subject: [PATCH 12/16] posts: gap-buffer: add deletion

---
 content/posts/2024-07-06-gap-buffer/index.md | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/content/posts/2024-07-06-gap-buffer/index.md b/content/posts/2024-07-06-gap-buffer/index.md
index b23f21a..9ca44ea 100644
--- a/content/posts/2024-07-06-gap-buffer/index.md
+++ b/content/posts/2024-07-06-gap-buffer/index.md
@@ -140,3 +140,22 @@ def insert(self, val: str) -> None:
     self._buf[self._gap_start : self._gap_start + len(val)] = val
     self._gap_start += len(val)
 ```
+
+### Deletion
+
+Removing text from the buffer simply expands the gap in the corresponding
+direction, shortening the string's prefix/suffix. This makes it very cheap.
+
+The methods are named after the `backspace` and `delete` keys on the keyboard.
+
+```python
+def backspace(self, dist: int = 1) -> None:
+    assert dist <= self.prefix_length
+    # Extend gap to the left
+    self._gap_start -= dist
+
+def delete(self, dist: int = 1) -> None:
+    assert dist <= self.suffix_length
+    # Extend gap to the right
+    self._gap_end += dist
+```

From a0e20dd341261cb66ad231bf6cddf92e35b96f24 Mon Sep 17 00:00:00 2001
From: Bruno BELANYI <bruno@belanyi.fr>
Date: Sun, 30 Jun 2024 12:38:01 +0100
Subject: [PATCH 13/16] posts: trie: add fuzzy matching

---
 content/posts/2024-06-30-trie/index.md | 55 ++++++++++++++++++++++++++
 1 file changed, 55 insertions(+)

diff --git a/content/posts/2024-06-30-trie/index.md b/content/posts/2024-06-30-trie/index.md
index 2a0d77e..aef49e3 100644
--- a/content/posts/2024-06-30-trie/index.md
+++ b/content/posts/2024-06-30-trie/index.md
@@ -114,3 +114,58 @@ def remove(self, key: str) -> bool:
     # Otherwise, recurse on the child corresponding to the first letter
     return self._children[key[0]].remove(key[1:])
 ```
+
+### Fuzzy matching
+
+Fuzzily matching a given word is where the real difficulty is: the key is to
+realize we can use the prefix-tree nature of a _Trie_ to avoid doing wasteful
+work.
+
+By leveraging the prefix visit order of the tree, we can build an iterative
+Levenshtein distance matrix, in much the same way one would do so in its
+[Dynamic Programming] implementation (see the [Wagner-Fisher algorithm]).
+
+[Dynamic Programming]: https://en.wikipedia.org/wiki/Dynamic_programming
+[Wagner-Fisher algorithm]: https://en.wikipedia.org/wiki/Wagner%E2%80%93Fischer_algorithm
+
+```python
+class FuzzyResult[T](NamedTuple):
+    distance: int
+    key: str
+    value: T
+
+
+def get_fuzzy(self, key: str, max_distance: int = 0) -> Iterator[FuzzyResult[T]]:
+    def helper(
+        current_word: str,
+        node: Trie[T],
+        previous_row: list[int],
+    ) -> Iterator[tuple[int, T]]:
+        # Iterative Levenshtein
+        current_row = [previous_row[0] + 1]
+        current_char = current_word[-1]
+        for column, key_char in enumerate(key, start=1):
+            insertion = current_row[column - 1] + 1
+            deletion = previous_row[column] + 1
+            replacement = previous_row[column - 1] + (key_char != current_char)
+            current_row.append(min(insertion, deletion, replacement))
+
+        # If we are under the max distance, match this node
+        if (distance := current_row[-1]) <= max_distance and node._value != None:
+            # Only if it has a value of course
+            yield FuzzyResult(distance, current_word, node._value)
+
+        # If we can potentially still match children, recurse
+        if min(current_row) <= max_distance:
+            for c, child in node._children.items():
+                yield from helper(current_word + c, child, current_row)
+
+    # Build the first row -- the edit distance from the empty string
+    row = list(range(len(key) + 1))
+
+    # Base case for the empty string
+    if (distance := row[-1]) <= max_distance and self._value != None:
+        yield FuzzyResult(distance, "", self._value)
+    for c, child in self._children.items():
+        yield from helper(c, child, row)
+```

From 2c31c1aff294231f18f0d2df9a96e4c9878ae5ee Mon Sep 17 00:00:00 2001
From: Bruno BELANYI <bruno@belanyi.fr>
Date: Sun, 14 Jul 2024 17:55:33 +0100
Subject: [PATCH 14/16] posts: bloom-filter: add insertion

---
 content/posts/2024-07-14-bloom-filter/index.md | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/content/posts/2024-07-14-bloom-filter/index.md b/content/posts/2024-07-14-bloom-filter/index.md
index 547d50f..1d593a7 100644
--- a/content/posts/2024-07-14-bloom-filter/index.md
+++ b/content/posts/2024-07-14-bloom-filter/index.md
@@ -62,3 +62,18 @@ class BloomFilter[T]:
         self._bits = 0
         self._hash_functions = hash_functions
 ```
+
+### Inserting a key
+
+To add an element to the filter, we take the output from each hash function and
+use that to set a bit in the filter. This combination of bit will identify the
+element, which we can use for lookup later.
+
+```python
+def insert(self, val: T) -> None:
+    # Iterate over each hash
+    for f in self._hash_functions:
+        n = f(val) % BIT_COUNT
+        # Set the corresponding bit
+        self._bit |= 1 << n
+```

From d1a67510ef975d54b70d18ba368e9d7b37709874 Mon Sep 17 00:00:00 2001
From: Bruno BELANYI <bruno@belanyi.fr>
Date: Sat, 6 Jul 2024 23:41:31 +0100
Subject: [PATCH 15/16] posts: gap-buffer: add movement

---
 content/posts/2024-07-06-gap-buffer/index.md | 30 ++++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/content/posts/2024-07-06-gap-buffer/index.md b/content/posts/2024-07-06-gap-buffer/index.md
index 9ca44ea..763628d 100644
--- a/content/posts/2024-07-06-gap-buffer/index.md
+++ b/content/posts/2024-07-06-gap-buffer/index.md
@@ -159,3 +159,33 @@ def delete(self, dist: int = 1) -> None:
     # Extend gap to the right
     self._gap_end += dist
 ```
+
+### Moving the cursor
+
+Moving the cursor along the buffer will shift letters from one side of the gap
+to the other, moving them accross from prefix to suffix and back.
+
+I find Python's list slicing not quite as elegant to read as a `memmove`, though
+it does make for a very small and efficient implementation.
+
+```python
+def left(self, dist: int = 1) -> None:
+    assert dist <= self.prefix_length
+    # Shift the needed number of characters from end of prefix to start of suffix
+    self._buf[self._gap_end - dist : self._gap_end] = self._buf[
+        self._gap_start - dist : self._gap_start
+    ]
+    # Adjust indices accordingly
+    self._gap_start -= dist
+    self._gap_end -= dist
+
+def right(self, dist: int = 1) -> None:
+    assert dist <= self.suffix_length
+    # Shift the needed number of characters from start of suffix to end of prefix
+    self._buf[self._gap_start : self._gap_start + dist] = self._buf[
+        self._gap_end : self._gap_end + dist
+    ]
+    # Adjust indices accordingly
+    self._gap_start += dist
+    self._gap_end += dist
+```

From 27152689eaae20208cd390e980255d66b09bd0f3 Mon Sep 17 00:00:00 2001
From: Bruno BELANYI <bruno@belanyi.fr>
Date: Sun, 14 Jul 2024 17:56:33 +0100
Subject: [PATCH 16/16] posts: bloom-filter: add lookup

---
 content/posts/2024-07-14-bloom-filter/index.md | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/content/posts/2024-07-14-bloom-filter/index.md b/content/posts/2024-07-14-bloom-filter/index.md
index 1d593a7..93107d4 100644
--- a/content/posts/2024-07-14-bloom-filter/index.md
+++ b/content/posts/2024-07-14-bloom-filter/index.md
@@ -77,3 +77,21 @@ def insert(self, val: T) -> None:
         # Set the corresponding bit
         self._bit |= 1 << n
 ```
+
+### Querying a key
+
+Because the _Bloom Filter_ does not actually store its elements, but some
+derived data from hashing them, it can only definitely say if an element _does
+not_ belong to it. Otherwise, it _may_ be part of the set, and should be checked
+against the actual underlying store.
+
+```python
+def may_contain(self, val: T) -> bool:
+    for f in self._hash_functions:
+        n = f(val) % BIT_COUNT
+        # If one of the bits is unset, the value is definitely not present
+        if not (self._bit & (1 << n)):
+            return False
+    # All bits were matched, `val` is likely to be part of the set
+    return True
+```