From 8efc7f269c2c273148bdd2627c8e36c3ff16e3c1 Mon Sep 17 00:00:00 2001 From: Bruno BELANYI Date: Fri, 29 Oct 2021 13:11:17 +0200 Subject: [PATCH 001/107] themes: anubis: bump submodule --- themes/anubis | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/themes/anubis b/themes/anubis index 5dab60e..fd6a2df 160000 --- a/themes/anubis +++ b/themes/anubis @@ -1 +1 @@ -Subproject commit 5dab60e04a37896c09a32137aefe821c63b3af04 +Subproject commit fd6a2df13601606e7f3e270ca810c278bba028c1 From 5a92364ae48eedaa6094a1f7b307946c5768a9fc Mon Sep 17 00:00:00 2001 From: Bruno BELANYI Date: Sat, 18 Dec 2021 18:00:56 +0100 Subject: [PATCH 002/107] themes: anubis: bump submodule --- themes/anubis | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/themes/anubis b/themes/anubis index fd6a2df..10c340b 160000 --- a/themes/anubis +++ b/themes/anubis @@ -1 +1 @@ -Subproject commit fd6a2df13601606e7f3e270ca810c278bba028c1 +Subproject commit 10c340b0112b677acc5c8e60b6d9b7b6b4c91334 From 30b47b9103a84ca5435e31a14707e2fd910210b5 Mon Sep 17 00:00:00 2001 From: Bruno BELANYI Date: Mon, 17 Jan 2022 11:37:45 +0100 Subject: [PATCH 003/107] content: about: update current work situation --- content/about.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/content/about.md b/content/about.md index 12b578b..86d96b3 100644 --- a/content/about.md +++ b/content/about.md @@ -4,7 +4,9 @@ description: "About me" date: 2020-07-14 --- -I'm a CS student at EPITA. +I'm currently working as a backend engineer at [DGEX +Solutions](https://www.linkedin.com/company/dgex-solutions/), an offshoot of +[SNCF Réseau](https://www.sncf-reseau.com/), the French national railway entity. You can find my CV [here](https://cv.belanyi.fr/en.pdf). Or [here](https://cv.belanyi.fr/fr.pdf) for the french version. From 3e3cc7104358dbd3f8795b9139085f080c841263 Mon Sep 17 00:00:00 2001 From: Bruno BELANYI Date: Mon, 20 Jun 2022 14:46:03 +0200 Subject: [PATCH 004/107] ci: migrate to drone-rsync --- .drone.jsonnet | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/.drone.jsonnet b/.drone.jsonnet index a5059e2..79e02bc 100644 --- a/.drone.jsonnet +++ b/.drone.jsonnet @@ -29,17 +29,16 @@ local Pipeline(isDev) = { { name: "deploy", commands: [ - "nix run github:ambroisie/nix-config#drone-scp", + "nix run github:ambroisie/nix-config#drone-rsync", ], environment: { - SCP_SOURCE: "public/*", - TAR_STRIP_COMPONENTS: 1, # Remove 'public/' suffix from file paths - SCP_RM: true, # Remove previous files from target directory - SCP_HOST: { from_secret: "ssh_host" }, - SCP_TARGET: { from_secret: "ssh_target" + if isDev then "_dev" else "" }, - SCP_USERNAME: { from_secret: "ssh_user" }, - SCP_KEY: { from_secret: "ssh_key" }, - SCP_PORT: { from_secret: "ssh_port" }, + # Trailing slash to synchronize the folder's *content* to the target + SYNC_SOURCE: "public/", + SYNC_HOST: { from_secret: "ssh_host" }, + SYNC_TARGET: { from_secret: "ssh_target" + if isDev then "_dev" else "" }, + SYNC_USERNAME: { from_secret: "ssh_user" }, + SYNC_KEY: { from_secret: "ssh_key" }, + SYNC_PORT: { from_secret: "ssh_port" }, }, }, { From 588d2b3f216048cb9a8f1d12de0712094a5460d2 Mon Sep 17 00:00:00 2001 From: Bruno BELANYI Date: Mon, 20 Jun 2022 14:46:37 +0200 Subject: [PATCH 005/107] nix: remove deprecated flake attributes --- flake.nix | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/flake.nix b/flake.nix index be1350a..fa11cb8 100644 --- a/flake.nix +++ b/flake.nix @@ -61,15 +61,17 @@ }; }; - devShell = pkgs.mkShell { - name = "blog"; + devShells = { + default = pkgs.mkShell { + name = "blog"; - buildInputs = with pkgs; [ - gnumake - hugo - ]; + buildInputs = with pkgs; [ + gnumake + hugo + ]; - inherit (self.checks.${system}.pre-commit) shellHook; + inherit (self.checks.${system}.pre-commit) shellHook; + }; }; } ); From 0cb79dc69e0e41d096f23d30617ef247ad293e28 Mon Sep 17 00:00:00 2001 From: Bruno BELANYI Date: Mon, 20 Jun 2022 14:49:56 +0200 Subject: [PATCH 006/107] posts: add mutiple-dispatch-in-c++ --- .../index.md | 246 ++++++++++++++++++ 1 file changed, 246 insertions(+) create mode 100644 content/posts/2022-11-02-multiple-dispatch-in-c++/index.md diff --git a/content/posts/2022-11-02-multiple-dispatch-in-c++/index.md b/content/posts/2022-11-02-multiple-dispatch-in-c++/index.md new file mode 100644 index 0000000..e9eb21b --- /dev/null +++ b/content/posts/2022-11-02-multiple-dispatch-in-c++/index.md @@ -0,0 +1,246 @@ +--- +title: "Multiple Dispatch in C++" +date: 2022-11-02T16:36:53+01:00 +draft: false # I don't care for draft mode, git has branches for that +description: "A Lisp super-power in C++" +tags: + - c++ + - design-pattern +categories: + - programming +series: +favorite: false +disable_feed: false +--- + +A great feature that can be used in more dynamic languages is *multiple +dispatch*. Here's an example in [Julia][julia-lang] taken from the [Wikipedia +article][wiki-multiple-dispatch]. + +```julia +abstract type SpaceObject end + +struct Asteroid <: SpaceObject + # Asteroid fields +end +struct Spaceship <: SpaceObject + # Spaceship fields +end + +collide_with(::Asteroid, ::Spaceship) = # Asteroid/Spaceship collision +collide_with(::Spaceship, ::Asteroid) = # Spaceship/Asteroid collision +collide_with(::Spaceship, ::Spaceship) = # Spaceship/Spaceship collision +collide_with(::Asteroid, ::Asteroid) = # Asteroid/Asteroid collision + +collide(x::SpaceObject, y::SpaceObject) = collide_with(x, y) +``` + +The `collide` function calls `collide_with` which, at runtime, will inspect the +types of its arguments and *dispatch* to the appropriate implementation. + +Julia was created with multiple dispatch as a first-class citizen, it is used +liberally in its ecosystem. C++ does not have access to such a feature natively, +but there are alternatives that I will be presenting in this article, and try to +justify there uses and limitations. + +[julia-lang]: https://julialang.org/ +[wiki-multiple-dispatch]: https://en.wikipedia.org/wiki/Multiple_dispatch + + +## Single dispatch + +The native way to perform dynamic dispatch in C++ is through the +use of *virtual methods*, which allows an object to *override* the behaviour of +one of its super-classes' method. + +Invoking a virtual method will perform *single dispatch*, on the dynamic type +of the object who's method is being called. + +Here is an example: + +```cpp +struct SpaceObject { + virtual ~SpaceObject() = default; + + // Pure virtual method, which must be overridden by non-abstract sub-classes + virtual void impact() = 0; +}; + +struct Asteroid : SpaceObject { + // Override the method for asteroid impacts + void impact() override { + std::cout << "Bang!\n"; + } +}; + +struct Spaceship : SpaceObject { + // Override the method for spaceship impacts + void impact() override { + std::cout << "Crash!\n"; + } +}; + +int main() { + std::unique_ptr object = std::make_unique(); + object->impact(); // Prints "Crash!" + + object = std::make_unique(); + object->impact(); // Prints "Bang!" +} +``` + +Virtual methods are great when you want to represent a common set of behaviour +(an *interface*), and be able to substitute various types with their specific +implementation. + +For example, a dummy file-system interface might look like the following: + +```cpp +struct Filesystem { + virtual void write(std::string_view filename, std::span data) = 0; + virtual std::vector read(std::string_view filename) = 0; + virtual void delete(std::string_view filename) = 0; +}; +``` + +You can then write `PosixFilesystem` which makes use of the POSIX API and +interact with actual on-disk data, `MockFilesystem` which only works in-memory +and can be used for testing, etc... + +## Double dispatch through the Visitor pattern + +Sometimes single dispatch is not enough, such as in the collision example at the +beginning of this article. In cases where a computation depends on the dynamic +type of *two* of its values, we can make use of double-dispatch by leveraging +the Visitor design pattern. This is done by calling a virtual method on the +first value, which itself will call a virtual method on the second value. + +Here's a commentated example: + +```cpp +struct Asteroid; +struct Spaceship; + +struct SpaceObject { + virtual ~SpaceObject() = default; + + // Only used to kick-start the double-dispatch process + virtual void collide_with(SpaceObject& other) = 0; + + // The actual dispatching methods + virtual void collide_with(Asteroid& other) = 0; + virtual void collide_with(Spaceship& other) = 0; +}; + +struct Asteroid : SpaceObject { + void collide_with(SpaceObject& other) override { + // `*this` is an `Asteroid&` which kick-starts the double-dispatch + other.collide_with(*this); + }; + + void collide_with(Asteroid& other) override { /* Asteroid/Asteroid */ }; + void collide_with(Spaceship& other) override { /* Asteroid/Spaceship */ }; +}; + +struct Spaceship : SpaceObject { + void collide_with(SpaceObject& other) override { + // `*this` is a `Spaceship&` which kick-starts the double-dispatch + other.collide_with(*this); + }; + + void collide_with(Asteroid& other) override { /* Spaceship/Asteroid */ }; + void collide_with(Spaceship& other) override { /* Spaceship/Spaceship */ }; +}; + +void collide(SpaceObject& first, SpaceObject& second) { + first.collide_with(second); +}; + +int main() { + auto asteroid = std::make_unique(); + auto spaceship = std::make_unique(); + + collide(*asteroid, *spaceship); + // Calls in order: + // - Asteroid::collide_with(SpaceObject&) + // - Spaceship::collide_with(Asteroid&) + + collide(*spaceship, *asteroid); + // Calls in order: + // - Spaceship::collide_with(SpaceObject&) + // - Asteroid::collide_with(Spaceship&) + + asteroid->collide_with(*spaceship); + // Only calls Asteroid::collide_with(Spaceship&) + + spaceship->collide_with(*asteroid); + // Only calls Spaceship::collide_with(Asteroid&) +} +``` + +Double dispatch is pattern is most commonly used with the *visitor pattern*, in +which a closed class hierarchy (the data) is separated from an open class +hierarchy (the algorithms acting on that data). This is especially useful in +e.g: compilers, where the AST class hierarchy represents the data *only*, and +all compiler stages and optimization passes are programmed by a series of +visitors. + +## Multiple dispatch on a closed class hierarchy + +When even double dispatch is not enough, there is a way to do multiple dispatch +in standard C++, included in the STL since C++17. However unlike the previous +methods I showed, this one relies on using [`std::variant`][variant-cppref] and +[`std::visit`][visit-cppref]. + +[variant-cppref]: https://en.cppreference.com/w/cpp/utility/variant +[visit-cppref]: https://en.cppreference.com/w/cpp/utility/variant/visit + +The limitation of `std::variant` is that you are limited to the types you can +select at *compile-time* for the values used during your dispatch operation. +You have a *closed* hierarchy of classes, which is the explicit list of types in +your `variant`. + +Nonetheless, if you can live with that limitation, then you have a great amount +of power available to you. I have used `std::visit` in the past to mimic the +effect of pattern matching. + +In this example, I re-create the double-dispatch from the previous section: + +```cpp +// No need to inherit from a `SpaceObject` base class +struct Asteroid {}; +struct Spaceship {}; + +// But the list of possible runtime *must* be enumerated at compile-time +using SpaceObject = std::variant; + +void collide(SpaceObject& first, SpaceObject& second) { + struct CollideDispatch { + void operator()(Asteroid& first, Asteroid& second) { + // Asteroid/Asteroid + } + void operator()(Asteroid& first, Spaceship& second) { + // Asteroid/Spaceship + } + void operator()(Spaceship& first, Asteroid& second) { + // Spaceship/Asteroid + } + void operator()(Spaceship& first, Spaceship& second) { + // Spaceship/Spaceship + } + }; + + std::visit(CollideDispatch(), first, second); +} + +int main() { + SpaceObject asteroid = Asteroid(); + SpaceObject spaceship = Spaceship(); + + collide(asteroid, spaceship); + // Calls CollideDispatch::operator()(Asteroid&, Spaceship&) + + collide(spaceship, asteroid); + // Calls CollideDispatch::operator()(Spaceship&, Asteroid&) +} +``` From bb48eaea3418a1b819b7f187039a3563a42e5ba9 Mon Sep 17 00:00:00 2001 From: Bruno BELANYI Date: Wed, 2 Nov 2022 16:03:25 +0100 Subject: [PATCH 007/107] posts: multiple-dispatch: add visitor downside --- .../posts/2022-11-02-multiple-dispatch-in-c++/index.md | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/content/posts/2022-11-02-multiple-dispatch-in-c++/index.md b/content/posts/2022-11-02-multiple-dispatch-in-c++/index.md index e9eb21b..928f5f9 100644 --- a/content/posts/2022-11-02-multiple-dispatch-in-c++/index.md +++ b/content/posts/2022-11-02-multiple-dispatch-in-c++/index.md @@ -185,6 +185,16 @@ e.g: compilers, where the AST class hierarchy represents the data *only*, and all compiler stages and optimization passes are programmed by a series of visitors. +One downside of this approach is that if you want to add `SpaceStation` as +a sub-class of `SpaceObject`, and handle its collisions with other +`SpaceObject`s, you need to: + +* Implement all `collide_with` methods for this new class. +* Add a new virtual method `collide_with(SpaceStation&)` and implement it on + every sub-class. + +This can be inconvenient if your class hierarchy changes often. + ## Multiple dispatch on a closed class hierarchy When even double dispatch is not enough, there is a way to do multiple dispatch From 7c6db4c19eae57a8feedbd57c9d41f460a4c17e5 Mon Sep 17 00:00:00 2001 From: Bruno BELANYI Date: Wed, 2 Nov 2022 16:06:51 +0100 Subject: [PATCH 008/107] posts: multiple-dispatch: add visit downside --- content/posts/2022-11-02-multiple-dispatch-in-c++/index.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/content/posts/2022-11-02-multiple-dispatch-in-c++/index.md b/content/posts/2022-11-02-multiple-dispatch-in-c++/index.md index 928f5f9..997faec 100644 --- a/content/posts/2022-11-02-multiple-dispatch-in-c++/index.md +++ b/content/posts/2022-11-02-multiple-dispatch-in-c++/index.md @@ -254,3 +254,7 @@ int main() { // Calls CollideDispatch::operator()(Spaceship&, Asteroid&) } ``` + +Obviously, the issue with adding a new `SpaceStation` variant is once again +apparent in this implementation. You will get a compile error unless you handle +this new `SpaceStation` variant at every point you `visit` the `SpaceObject`s. From 245d7d123d34555356005202a43a7e532c38a3fb Mon Sep 17 00:00:00 2001 From: Bruno BELANYI Date: Wed, 2 Nov 2022 16:31:43 +0100 Subject: [PATCH 009/107] posts: multiple-dispatch: add expression problem --- .../index.md | 32 +++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/content/posts/2022-11-02-multiple-dispatch-in-c++/index.md b/content/posts/2022-11-02-multiple-dispatch-in-c++/index.md index 997faec..f179561 100644 --- a/content/posts/2022-11-02-multiple-dispatch-in-c++/index.md +++ b/content/posts/2022-11-02-multiple-dispatch-in-c++/index.md @@ -258,3 +258,35 @@ int main() { Obviously, the issue with adding a new `SpaceStation` variant is once again apparent in this implementation. You will get a compile error unless you handle this new `SpaceStation` variant at every point you `visit` the `SpaceObject`s. + +## The Expression Problem + +One issue we have not been able to move past in these exemples is the +[Expression Problem][expression-problem]. In two words, this means that we can't +add a new data type (e.g: `SpaceStation`), or a new operation (e.g: `land_on`) +to our current code without re-compiling it. + +[expression-problem]: https://en.wikipedia.org/wiki/Expression_problem + +This is the downside I was pointing out in our previous sections: + +* Data type extension: one can easily add a new `SpaceObject` child-class in the + OOP version, but needs to modify each implementation if we want to add a new + method to the `SpaceObject` interface to implement a new operation. +* Operation extension: one can easily create a new function when using the + `std::variant` based representation, as pattern-matching easily allows us to + only handle the kinds of values we are interested in. But adding a new + `SpaceObject` variant means we need to modify and re-compile every + `std::visit` call to handle the new variant. + +There is currently no (good) way in standard C++ to tackle the Expression +Problem. A paper ([N2216][N2216]) was written to propose a new language feature +to improve the situation. However it looks quite complex, and never got followed +up on for standardization. + +[N2216]: https://open-std.org/jtc1/sc22/wg21/docs/papers/2007/n2216.pdf + +In the meantime, one can find some libraries (like [`yomm2`][yomm2]) that +reduce the amount of boiler-plate needed to emulate this feature. + +[yomm2]: https://github.com/jll63/yomm2 From bd5f94746956a69fe6b835a4b34c92c5c5c8e426 Mon Sep 17 00:00:00 2001 From: Bruno BELANYI Date: Thu, 3 Nov 2022 11:22:24 +0100 Subject: [PATCH 010/107] posts: multiple-dispatch: add 'yomm2' example --- .../index.md | 37 +++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/content/posts/2022-11-02-multiple-dispatch-in-c++/index.md b/content/posts/2022-11-02-multiple-dispatch-in-c++/index.md index f179561..838de49 100644 --- a/content/posts/2022-11-02-multiple-dispatch-in-c++/index.md +++ b/content/posts/2022-11-02-multiple-dispatch-in-c++/index.md @@ -290,3 +290,40 @@ In the meantime, one can find some libraries (like [`yomm2`][yomm2]) that reduce the amount of boiler-plate needed to emulate this feature. [yomm2]: https://github.com/jll63/yomm2 + +```cpp +#include + +struct SpaceObject { + virtual ~SpaceObject() = default; +}; + +struct Asteroid : SpaceObject { /* fields, methods, etc... */ }; + +struct Spaceship : SpaceObject { /* fields, methods, etc... */ }; + +// Register all sub-classes of `SpaceObject` for use with open methods +register_classes(SpaceObject, Asteroid, Spaceship); + +// Register the `collide` open method, which dispatches on two arguments +declare_method(void, collide, (virtual_, virtual_)); + +// Write the different implementations of `collide` +define_method(void, collide, (Asteroid& left, Asteroid& right)) { /* work */ } +define_method(void, collide, (Asteroid& left, Spaceship& right)) { /* work */ } +define_method(void, collide, (Spaceship& left, Asteroid& right)) { /* work */ } +define_method(void, collide, (Spaceship& left, Spaceship& right)) { /* work */ } + + +int main() { + yorel::yomm2::update_methods(); + + auto asteroid = std::make_unique(); + auto spaceship = std::make_unique(); + + collide(*asteroid, *spaceship); // Calls (Asteroid, Spaceship) version + collide(*spaceship, *asteroid); // Calls (Spaceship, Asteroid) version + collide(*asteroid, *asteroid); // Calls (Asteroid, Asteroid) version + collide(*spaceship, *spaceship); // Calls (Spaceship, Spaceship) version +} +``` From a1746dd70c7c83bda29e3918b6056194876a97ed Mon Sep 17 00:00:00 2001 From: Bruno BELANYI Date: Mon, 21 Nov 2022 12:14:52 +0100 Subject: [PATCH 011/107] nix: bump flake inputs --- flake.lock | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/flake.lock b/flake.lock index f4850aa..4d279e9 100644 --- a/flake.lock +++ b/flake.lock @@ -2,11 +2,11 @@ "nodes": { "futils": { "locked": { - "lastModified": 1622445595, - "narHash": "sha256-m+JRe6Wc5OZ/mKw2bB3+Tl0ZbtyxxxfnAWln8Q5qs+Y=", + "lastModified": 1667395993, + "narHash": "sha256-nuEHfE/LcWyuSWnS8t12N1wc105Qtau+/OdUAjtQ0rA=", "owner": "numtide", "repo": "flake-utils", - "rev": "7d706970d94bc5559077eb1a6600afddcd25a7c8", + "rev": "5aed5285a952e0b949eb3ba02c12fa4fcfef535f", "type": "github" }, "original": { @@ -18,11 +18,11 @@ }, "nixpkgs": { "locked": { - "lastModified": 1628320020, - "narHash": "sha256-4xBEb+TOHyIGpK37EVsZx6dGPwNMf5YWNBJaQ4VyZws=", + "lastModified": 1669001258, + "narHash": "sha256-fi0hCUCalMwd+RUi6zUBzNb0ShowaJk+o+p8qtF/Iv4=", "owner": "NixOS", "repo": "nixpkgs", - "rev": "67c80531be622641b5b2ccc3a7aff355cb02476b", + "rev": "c6e5939c8fa2ab2230baf1378a34746e8db1aed7", "type": "github" }, "original": { @@ -42,11 +42,11 @@ ] }, "locked": { - "lastModified": 1621411868, - "narHash": "sha256-R+7OQ2JYFCb3E7Jl7LhRifzMVCR6Gl8R98zYsNhZtJ8=", + "lastModified": 1669018323, + "narHash": "sha256-/2Ixw4v5JbbhH+sE6huvyG+txhBGIcx5iWIZ4kWtilU=", "owner": "cachix", "repo": "pre-commit-hooks.nix", - "rev": "2e7fac06108b4fc81f5ff9ed9a02bc4f6ede7001", + "rev": "46fb5634676994bd333a94c8bd322eb1854ff223", "type": "github" }, "original": { From e737a81e83c79ce3ecbd8c4605dce99ef1039255 Mon Sep 17 00:00:00 2001 From: Bruno BELANYI Date: Mon, 21 Nov 2022 12:37:04 +0100 Subject: [PATCH 012/107] content: wish-lists: add iFixit toolkit --- content/wish-lists.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/content/wish-lists.md b/content/wish-lists.md index 0a65fa9..703168f 100644 --- a/content/wish-lists.md +++ b/content/wish-lists.md @@ -14,3 +14,5 @@ A few of my Amazon wish lists in case you want to give me a gift. * [Cooking & kitchenware](https://www.amazon.fr/hz/wishlist/ls/2MNRCLPNABZSU) * [Data Structures & Algorithms](https://www.amazon.fr/hz/wishlist/ls/2XZPQSBOGOFC3) * [Programming](https://www.amazon.fr/hz/wishlist/ls/1R4KFV4H2D8IF) +* Other items: + * [iFixit Pro Tech Toolit](https://store.ifixit.fr/products/pro-tech-toolkit) From 87664132b175069b2e17264593fe70c4a726785b Mon Sep 17 00:00:00 2001 From: Bruno BELANYI Date: Mon, 26 Dec 2022 18:11:46 +0100 Subject: [PATCH 013/107] nix: bump flake inputs --- flake.lock | 54 ++++++++++++++++++++++++++++++++++++++++++++++++------ flake.nix | 1 + 2 files changed, 49 insertions(+), 6 deletions(-) diff --git a/flake.lock b/flake.lock index 4d279e9..840b160 100644 --- a/flake.lock +++ b/flake.lock @@ -1,5 +1,21 @@ { "nodes": { + "flake-compat": { + "flake": false, + "locked": { + "lastModified": 1668681692, + "narHash": "sha256-Ht91NGdewz8IQLtWZ9LCeNXMSXHUss+9COoqu6JLmXU=", + "owner": "edolstra", + "repo": "flake-compat", + "rev": "009399224d5e398d03b22badca40a37ac85412a1", + "type": "github" + }, + "original": { + "owner": "edolstra", + "repo": "flake-compat", + "type": "github" + } + }, "futils": { "locked": { "lastModified": 1667395993, @@ -16,13 +32,34 @@ "type": "github" } }, + "gitignore": { + "inputs": { + "nixpkgs": [ + "pre-commit-hooks", + "nixpkgs" + ] + }, + "locked": { + "lastModified": 1660459072, + "narHash": "sha256-8DFJjXG8zqoONA1vXtgeKXy68KdJL5UaXR8NtVMUbx8=", + "owner": "hercules-ci", + "repo": "gitignore.nix", + "rev": "a20de23b925fd8264fd7fad6454652e142fd7f73", + "type": "github" + }, + "original": { + "owner": "hercules-ci", + "repo": "gitignore.nix", + "type": "github" + } + }, "nixpkgs": { "locked": { - "lastModified": 1669001258, - "narHash": "sha256-fi0hCUCalMwd+RUi6zUBzNb0ShowaJk+o+p8qtF/Iv4=", + "lastModified": 1671997655, + "narHash": "sha256-8zUwvnJrBwiFIdw9VgARj1PIQsto5Spn9J5v34b0O7A=", "owner": "NixOS", "repo": "nixpkgs", - "rev": "c6e5939c8fa2ab2230baf1378a34746e8db1aed7", + "rev": "aac1f0b25e6b04afad8e05dec5828f5c02398bd1", "type": "github" }, "original": { @@ -34,19 +71,24 @@ }, "pre-commit-hooks": { "inputs": { + "flake-compat": "flake-compat", "flake-utils": [ "futils" ], + "gitignore": "gitignore", "nixpkgs": [ "nixpkgs" + ], + "nixpkgs-stable": [ + "nixpkgs" ] }, "locked": { - "lastModified": 1669018323, - "narHash": "sha256-/2Ixw4v5JbbhH+sE6huvyG+txhBGIcx5iWIZ4kWtilU=", + "lastModified": 1672050129, + "narHash": "sha256-GBQMcvJUSwAVOpDjVKzB6D5mmHI7Y4nFw+04bnS9QrM=", "owner": "cachix", "repo": "pre-commit-hooks.nix", - "rev": "46fb5634676994bd333a94c8bd322eb1854ff223", + "rev": "67d98f02443b9928bc77f1267741dcfdd3d7b65c", "type": "github" }, "original": { diff --git a/flake.nix b/flake.nix index fa11cb8..3b97c36 100644 --- a/flake.nix +++ b/flake.nix @@ -24,6 +24,7 @@ inputs = { flake-utils.follows = "futils"; nixpkgs.follows = "nixpkgs"; + nixpkgs-stable.follows = "nixpkgs"; }; }; }; From c541a721f76e120a01031b1769661afb1f7d2527 Mon Sep 17 00:00:00 2001 From: Bruno BELANYI Date: Thu, 16 Feb 2023 20:43:21 +0000 Subject: [PATCH 014/107] content: about: update current work situation --- content/about.md | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/content/about.md b/content/about.md index 86d96b3..04d7226 100644 --- a/content/about.md +++ b/content/about.md @@ -4,9 +4,10 @@ description: "About me" date: 2020-07-14 --- -I'm currently working as a backend engineer at [DGEX -Solutions](https://www.linkedin.com/company/dgex-solutions/), an offshoot of -[SNCF Réseau](https://www.sncf-reseau.com/), the French national railway entity. +I'm currently working as a Senior Software Engineer at [Google][google], as part +of their Embedded Graphics Drivers team for Pixel devices. + +[google]: https://www.linkedin.com/company/google/mycompany/verification/ You can find my CV [here](https://cv.belanyi.fr/en.pdf). Or [here](https://cv.belanyi.fr/fr.pdf) for the french version. From eaa10e471287be333c2ba692081c09173026d168 Mon Sep 17 00:00:00 2001 From: Bruno BELANYI Date: Tue, 21 Mar 2023 19:49:41 +0000 Subject: [PATCH 015/107] config: add author mastodon --- config.yaml | 1 + layouts/partials/footer-extra.html | 1 + 2 files changed, 2 insertions(+) diff --git a/config.yaml b/config.yaml index 0efb236..1b25493 100644 --- a/config.yaml +++ b/config.yaml @@ -42,6 +42,7 @@ author: sourcehut: "ambroisie" linkedin: "bruno-belanyi" matrix: "@ambroisie:belanyi.fr" + mastodon: "nixos.paris/@ambroisie" permalinks: posts: /:year/:month/:day/:title/ diff --git a/layouts/partials/footer-extra.html b/layouts/partials/footer-extra.html index d6daaaf..23530b8 100644 --- a/layouts/partials/footer-extra.html +++ b/layouts/partials/footer-extra.html @@ -11,6 +11,7 @@ Sourcehut LinkedIn Matrix + Mastodon PGP

From 4a6a2440e1c51f299251a21cbf7c6c0dfb6d4610 Mon Sep 17 00:00:00 2001 From: Bruno BELANYI Date: Fri, 24 Mar 2023 20:25:22 +0000 Subject: [PATCH 016/107] config: use lower-case usernames --- config.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/config.yaml b/config.yaml index 1b25493..c0927fc 100644 --- a/config.yaml +++ b/config.yaml @@ -37,8 +37,8 @@ menu: author: name: "Bruno BELANYI" email: "contact-blog@belanyi.fr" - github: "Ambroisie" - gitlab: "Ambroisie" + github: "ambroisie" + gitlab: "ambroisie" sourcehut: "ambroisie" linkedin: "bruno-belanyi" matrix: "@ambroisie:belanyi.fr" From f16929c2886268485858e2018083a62063edbb83 Mon Sep 17 00:00:00 2001 From: Bruno BELANYI Date: Fri, 24 Mar 2023 20:30:02 +0000 Subject: [PATCH 017/107] posts: git-basics: fix typo --- content/posts/2020-12-07-git-basics/index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/content/posts/2020-12-07-git-basics/index.md b/content/posts/2020-12-07-git-basics/index.md index daa3682..e39c8bb 100644 --- a/content/posts/2020-12-07-git-basics/index.md +++ b/content/posts/2020-12-07-git-basics/index.md @@ -317,7 +317,7 @@ easily choose which parts of your changes should end up in the same commit. Here's a list of commands that you should read-up on, but I won't be presenting further: -* `git bissect` +* `git bisect` * `git rerere` * `git stash` * and more... From 58eabe1745a3ddc6e22b6977708a7788d4d708bb Mon Sep 17 00:00:00 2001 From: Bruno BELANYI Date: Sat, 1 Apr 2023 20:57:51 +0100 Subject: [PATCH 018/107] ci: add Woodpecker CI workflow This uses some trickery to make it DRY across 'dev' and 'prod' deployments, which only need to change a few lines. I'm surprised it even works! --- .woodpecker/deploy.yml | 66 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) create mode 100644 .woodpecker/deploy.yml diff --git a/.woodpecker/deploy.yml b/.woodpecker/deploy.yml new file mode 100644 index 0000000..2a6ddb0 --- /dev/null +++ b/.woodpecker/deploy.yml @@ -0,0 +1,66 @@ +labels: + type: exec + +matrix: + include: + - TYPE: dev + MAKE_TARGET: build-dev + SSH_TARGET: ssh_target_dev + - TYPE: prod + MAKE_TARGET: build-prod + SSH_TARGET: ssh_target + +# Run the correct matrix build on the correct branch +when: + evaluate: | + CI_PIPELINE_EVENT in ["push", "cron", "deployment", "manual"] + and ((CI_COMMIT_BRANCH == CI_REPO_DEFAULT_BRANCH) == ("${TYPE}" == "prod")) + +pipeline: +- name: check + image: bash + commands: + - nix flake check + +- name: build (${TYPE}) + image: bash + commands: + # If dev, include drafts and future articles, change base URL + - nix develop -c make ${MAKE_TARGET} + +- name: deploy (${TYPE}) + image: bash + environment: + # Trailing slash to synchronize the folder's *content* to the target + - SYNC_SOURCE=public/ + secrets: + - source: ssh_key + target: sync_key + - source: ssh_port + target: sync_port + - source: ${SSH_TARGET} + target: sync_target + - source: ssh_user + target: sync_username + - source: ssh_host + target: sync_host + commands: + - "nix run github:ambroisie/nix-config#drone-rsync" + +- name: notify + image: bash + secrets: + - source: matrix_homeserver + target: address + - source: matrix_password + target: pass + - source: matrix_roomid + target: room + - source: matrix_username + target: user + commands: + - nix run github:ambroisie/matrix-notifier + when: + status: + - failure + - success From 898ec9b504913c0a4a80346b0cec88a9a74fea6c Mon Sep 17 00:00:00 2001 From: Bruno BELANYI Date: Sat, 1 Apr 2023 21:04:58 +0100 Subject: [PATCH 019/107] ci: remove Drone CI --- .drone.jsonnet | 64 -------------------------------------------------- 1 file changed, 64 deletions(-) delete mode 100644 .drone.jsonnet diff --git a/.drone.jsonnet b/.drone.jsonnet deleted file mode 100644 index 79e02bc..0000000 --- a/.drone.jsonnet +++ /dev/null @@ -1,64 +0,0 @@ -local Pipeline(isDev) = { - kind: "pipeline", - type: "exec", - name: if isDev then "Deploy to dev" else "Deploy to prod", - # Dev ignores "master", prod only triggers on "master" - trigger: { branch: { [if isDev then "exclude" else "include"]: [ "main" ] } }, - steps: [ - { - # We want to clone the submodules, which isn't done by default - name: "submodules", - commands: [ - "git submodule update --recursive --init", - ] - }, - { - # Include pre-commit checks, which include markdownlint - name: "check", - commands: [ - "nix flake check", - ], - }, - { - # If dev, include drafts and future articles, change base URL - name: "build", - commands: [ - "nix develop -c make " + if isDev then "build-dev" else "build-prod", - ], - }, - { - name: "deploy", - commands: [ - "nix run github:ambroisie/nix-config#drone-rsync", - ], - environment: { - # Trailing slash to synchronize the folder's *content* to the target - SYNC_SOURCE: "public/", - SYNC_HOST: { from_secret: "ssh_host" }, - SYNC_TARGET: { from_secret: "ssh_target" + if isDev then "_dev" else "" }, - SYNC_USERNAME: { from_secret: "ssh_user" }, - SYNC_KEY: { from_secret: "ssh_key" }, - SYNC_PORT: { from_secret: "ssh_port" }, - }, - }, - { - name: "notify", - commands: [ - "nix run github:ambroisie/matrix-notifier", - ], - environment: { - ADDRESS: { from_secret: "matrix_homeserver" }, - ROOM: { from_secret: "matrix_roomid" }, - USER: { from_secret: "matrix_username" }, - PASS: { from_secret: "matrix_password" }, - }, - when: { status: [ "failure", "success", ] }, - }, - ] -}; - - -[ - Pipeline(false), - Pipeline(true), -] From 4042c99629f96e985a77b87d3b0d97b5f431a63c Mon Sep 17 00:00:00 2001 From: Bruno BELANYI Date: Sat, 1 Apr 2023 21:21:52 +0100 Subject: [PATCH 020/107] ci: fix cron deployments --- .woodpecker/deploy.yml | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/.woodpecker/deploy.yml b/.woodpecker/deploy.yml index 2a6ddb0..d20b541 100644 --- a/.woodpecker/deploy.yml +++ b/.woodpecker/deploy.yml @@ -1,3 +1,6 @@ +variables: +- &all_events ["push", "cron", "deployment", "manual"] + labels: type: exec @@ -21,12 +24,16 @@ pipeline: image: bash commands: - nix flake check + when: + event: *all_events - name: build (${TYPE}) image: bash commands: # If dev, include drafts and future articles, change base URL - nix develop -c make ${MAKE_TARGET} + when: + event: *all_events - name: deploy (${TYPE}) image: bash @@ -46,6 +53,8 @@ pipeline: target: sync_host commands: - "nix run github:ambroisie/nix-config#drone-rsync" + when: + event: *all_events - name: notify image: bash @@ -61,6 +70,7 @@ pipeline: commands: - nix run github:ambroisie/matrix-notifier when: + event: *all_events status: - failure - success From cb5579dc0203102c1dea202ec3a5a25243fa5bd2 Mon Sep 17 00:00:00 2001 From: Bruno BELANYI Date: Tue, 18 Apr 2023 17:18:41 +0200 Subject: [PATCH 021/107] content: wish-list: remove iFixIt toolkit I finally bought it. --- content/wish-lists.md | 2 -- 1 file changed, 2 deletions(-) diff --git a/content/wish-lists.md b/content/wish-lists.md index 703168f..0a65fa9 100644 --- a/content/wish-lists.md +++ b/content/wish-lists.md @@ -14,5 +14,3 @@ A few of my Amazon wish lists in case you want to give me a gift. * [Cooking & kitchenware](https://www.amazon.fr/hz/wishlist/ls/2MNRCLPNABZSU) * [Data Structures & Algorithms](https://www.amazon.fr/hz/wishlist/ls/2XZPQSBOGOFC3) * [Programming](https://www.amazon.fr/hz/wishlist/ls/1R4KFV4H2D8IF) -* Other items: - * [iFixit Pro Tech Toolit](https://store.ifixit.fr/products/pro-tech-toolkit) From 2b074ce20b6d64d7aa41950cfa1945ba9541151b Mon Sep 17 00:00:00 2001 From: Bruno BELANYI Date: Sun, 30 Apr 2023 19:49:40 +0100 Subject: [PATCH 022/107] content: wish-lists: add chef's presses --- content/wish-lists.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/content/wish-lists.md b/content/wish-lists.md index 0a65fa9..c560e78 100644 --- a/content/wish-lists.md +++ b/content/wish-lists.md @@ -14,3 +14,5 @@ A few of my Amazon wish lists in case you want to give me a gift. * [Cooking & kitchenware](https://www.amazon.fr/hz/wishlist/ls/2MNRCLPNABZSU) * [Data Structures & Algorithms](https://www.amazon.fr/hz/wishlist/ls/2XZPQSBOGOFC3) * [Programming](https://www.amazon.fr/hz/wishlist/ls/1R4KFV4H2D8IF) +* Other items: + * [Chef's presses](https://www.thechefspress.com/shop) From e0716156e109322f794779d52449d84e6ec1e144 Mon Sep 17 00:00:00 2001 From: Bruno BELANYI Date: Sun, 30 Apr 2023 19:49:55 +0100 Subject: [PATCH 023/107] content: wish-lists: add lego bonsai --- content/wish-lists.md | 1 + 1 file changed, 1 insertion(+) diff --git a/content/wish-lists.md b/content/wish-lists.md index c560e78..c85b76c 100644 --- a/content/wish-lists.md +++ b/content/wish-lists.md @@ -16,3 +16,4 @@ A few of my Amazon wish lists in case you want to give me a gift. * [Programming](https://www.amazon.fr/hz/wishlist/ls/1R4KFV4H2D8IF) * Other items: * [Chef's presses](https://www.thechefspress.com/shop) + * [Lego bonsai tree](https://www.lego.com/en-gb/product/bonsai-tree-10281) From 102ce898b293dc2652b62ff774192ca94c522d11 Mon Sep 17 00:00:00 2001 From: Bruno BELANYI Date: Sun, 30 Jul 2023 14:27:16 +0100 Subject: [PATCH 024/107] ci: use 'backend' tag --- .woodpecker/deploy.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.woodpecker/deploy.yml b/.woodpecker/deploy.yml index d20b541..78578a3 100644 --- a/.woodpecker/deploy.yml +++ b/.woodpecker/deploy.yml @@ -2,7 +2,7 @@ variables: - &all_events ["push", "cron", "deployment", "manual"] labels: - type: exec + backend: local matrix: include: From 00af2959d044bff46874d9169e215578aff49627 Mon Sep 17 00:00:00 2001 From: Bruno BELANYI Date: Fri, 4 Aug 2023 23:18:24 +0100 Subject: [PATCH 025/107] nix: bump flake inputs --- flake.lock | 44 +++++++++++++++++++++++++++++++------------- flake.nix | 2 +- 2 files changed, 32 insertions(+), 14 deletions(-) diff --git a/flake.lock b/flake.lock index 840b160..eb36380 100644 --- a/flake.lock +++ b/flake.lock @@ -3,11 +3,11 @@ "flake-compat": { "flake": false, "locked": { - "lastModified": 1668681692, - "narHash": "sha256-Ht91NGdewz8IQLtWZ9LCeNXMSXHUss+9COoqu6JLmXU=", + "lastModified": 1673956053, + "narHash": "sha256-4gtG9iQuiKITOjNQQeQIpoIB6b16fm+504Ch3sNKLd8=", "owner": "edolstra", "repo": "flake-compat", - "rev": "009399224d5e398d03b22badca40a37ac85412a1", + "rev": "35bb57c0c8d8b62bbfd284272c928ceb64ddbde9", "type": "github" }, "original": { @@ -17,17 +17,20 @@ } }, "futils": { + "inputs": { + "systems": "systems" + }, "locked": { - "lastModified": 1667395993, - "narHash": "sha256-nuEHfE/LcWyuSWnS8t12N1wc105Qtau+/OdUAjtQ0rA=", + "lastModified": 1689068808, + "narHash": "sha256-6ixXo3wt24N/melDWjq70UuHQLxGV8jZvooRanIHXw0=", "owner": "numtide", "repo": "flake-utils", - "rev": "5aed5285a952e0b949eb3ba02c12fa4fcfef535f", + "rev": "919d646de7be200f3bf08cb76ae1f09402b6f9b4", "type": "github" }, "original": { "owner": "numtide", - "ref": "master", + "ref": "main", "repo": "flake-utils", "type": "github" } @@ -55,11 +58,11 @@ }, "nixpkgs": { "locked": { - "lastModified": 1671997655, - "narHash": "sha256-8zUwvnJrBwiFIdw9VgARj1PIQsto5Spn9J5v34b0O7A=", + "lastModified": 1691155369, + "narHash": "sha256-CIuJO5pgwCMsZM8flIU2OiZ79QfDCesXPsAiokCzlNM=", "owner": "NixOS", "repo": "nixpkgs", - "rev": "aac1f0b25e6b04afad8e05dec5828f5c02398bd1", + "rev": "7d050b98e51cdbdd88ad960152d398d41c7ff5b4", "type": "github" }, "original": { @@ -84,11 +87,11 @@ ] }, "locked": { - "lastModified": 1672050129, - "narHash": "sha256-GBQMcvJUSwAVOpDjVKzB6D5mmHI7Y4nFw+04bnS9QrM=", + "lastModified": 1691093055, + "narHash": "sha256-sjNWYpDHc6vx+/M0WbBZKltR0Avh2S43UiDbmYtfHt0=", "owner": "cachix", "repo": "pre-commit-hooks.nix", - "rev": "67d98f02443b9928bc77f1267741dcfdd3d7b65c", + "rev": "ebb43bdacd1af8954d04869c77bc3b61fde515e4", "type": "github" }, "original": { @@ -104,6 +107,21 @@ "nixpkgs": "nixpkgs", "pre-commit-hooks": "pre-commit-hooks" } + }, + "systems": { + "locked": { + "lastModified": 1681028828, + "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=", + "owner": "nix-systems", + "repo": "default", + "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e", + "type": "github" + }, + "original": { + "owner": "nix-systems", + "repo": "default", + "type": "github" + } } }, "root": "root", diff --git a/flake.nix b/flake.nix index 3b97c36..c70daf0 100644 --- a/flake.nix +++ b/flake.nix @@ -6,7 +6,7 @@ type = "github"; owner = "numtide"; repo = "flake-utils"; - ref = "master"; + ref = "main"; }; nixpkgs = { From 9d5307f8cb3cddba9bb8600cfc052ebe3aa64427 Mon Sep 17 00:00:00 2001 From: Bruno BELANYI Date: Fri, 13 Oct 2023 18:35:28 +0100 Subject: [PATCH 026/107] content: wish-lists: add chef's presses links --- content/wish-lists.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/content/wish-lists.md b/content/wish-lists.md index c85b76c..589f3da 100644 --- a/content/wish-lists.md +++ b/content/wish-lists.md @@ -16,4 +16,6 @@ A few of my Amazon wish lists in case you want to give me a gift. * [Programming](https://www.amazon.fr/hz/wishlist/ls/1R4KFV4H2D8IF) * Other items: * [Chef's presses](https://www.thechefspress.com/shop) + * [UK link](https://www.kitchenprovisions.co.uk/products/the-chefs-press-8oz-13oz) + * [FR link](https://www.thekitchenlab.fr/fr/p/la-presse-du-chef-poids-de-friture-bruce-hill-13-oz/) * [Lego bonsai tree](https://www.lego.com/en-gb/product/bonsai-tree-10281) From 3286e92f594db0082457be93aa638249ea728daf Mon Sep 17 00:00:00 2001 From: Bruno BELANYI Date: Fri, 13 Oct 2023 18:36:06 +0100 Subject: [PATCH 027/107] content: wish-lists: add knives --- content/wish-lists.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/content/wish-lists.md b/content/wish-lists.md index 589f3da..3b2aaf6 100644 --- a/content/wish-lists.md +++ b/content/wish-lists.md @@ -18,4 +18,8 @@ A few of my Amazon wish lists in case you want to give me a gift. * [Chef's presses](https://www.thechefspress.com/shop) * [UK link](https://www.kitchenprovisions.co.uk/products/the-chefs-press-8oz-13oz) * [FR link](https://www.thekitchenlab.fr/fr/p/la-presse-du-chef-poids-de-friture-bruce-hill-13-oz/) + * Knives + * [Gyuto, damascus finish, 210mm](https://www.kitchenprovisions.co.uk/collections/stainless-steel/products/gyuto-knife-vg10-damascus-finish-ohishi) + * [Gyuto, nashiji finish, 240mm](https://www.kitchenprovisions.co.uk/collections/kanehiro/products/gyuto-knife-ginsan-stainless-steel-nashiji-finish-kanehiro?variant=39743550718014) + * [Gyuto, polished finish, black ferrule 240mm](https://www.kitchenprovisions.co.uk/collections/sukenari/products/gyuto-knife-hap40-powder-steel-polished-finish-sukenari?variant=39743545016382) * [Lego bonsai tree](https://www.lego.com/en-gb/product/bonsai-tree-10281) From e2867851107aac5ad7c1e21f57e3bc1887322c97 Mon Sep 17 00:00:00 2001 From: Bruno BELANYI Date: Sat, 18 Nov 2023 14:18:02 +0000 Subject: [PATCH 028/107] content: wish-lists: remove a knife I bought it on sale. --- content/wish-lists.md | 1 - 1 file changed, 1 deletion(-) diff --git a/content/wish-lists.md b/content/wish-lists.md index 3b2aaf6..4f29bba 100644 --- a/content/wish-lists.md +++ b/content/wish-lists.md @@ -20,6 +20,5 @@ A few of my Amazon wish lists in case you want to give me a gift. * [FR link](https://www.thekitchenlab.fr/fr/p/la-presse-du-chef-poids-de-friture-bruce-hill-13-oz/) * Knives * [Gyuto, damascus finish, 210mm](https://www.kitchenprovisions.co.uk/collections/stainless-steel/products/gyuto-knife-vg10-damascus-finish-ohishi) - * [Gyuto, nashiji finish, 240mm](https://www.kitchenprovisions.co.uk/collections/kanehiro/products/gyuto-knife-ginsan-stainless-steel-nashiji-finish-kanehiro?variant=39743550718014) * [Gyuto, polished finish, black ferrule 240mm](https://www.kitchenprovisions.co.uk/collections/sukenari/products/gyuto-knife-hap40-powder-steel-polished-finish-sukenari?variant=39743545016382) * [Lego bonsai tree](https://www.lego.com/en-gb/product/bonsai-tree-10281) From d4bfa1bc740ff4304a0a7352afcf8c8b517d3104 Mon Sep 17 00:00:00 2001 From: Bruno BELANYI Date: Sat, 18 Nov 2023 20:02:59 +0000 Subject: [PATCH 029/107] ci: remove explicit events This should have been fixed upstream. So removing it and monitoring that it works. The main issue was with cron IIRC. --- .woodpecker/deploy.yml | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/.woodpecker/deploy.yml b/.woodpecker/deploy.yml index 78578a3..ac8f3fd 100644 --- a/.woodpecker/deploy.yml +++ b/.woodpecker/deploy.yml @@ -1,6 +1,3 @@ -variables: -- &all_events ["push", "cron", "deployment", "manual"] - labels: backend: local @@ -16,24 +13,19 @@ matrix: # Run the correct matrix build on the correct branch when: evaluate: | - CI_PIPELINE_EVENT in ["push", "cron", "deployment", "manual"] - and ((CI_COMMIT_BRANCH == CI_REPO_DEFAULT_BRANCH) == ("${TYPE}" == "prod")) + ((CI_COMMIT_BRANCH == CI_REPO_DEFAULT_BRANCH) == ("${TYPE}" == "prod")) pipeline: - name: check image: bash commands: - nix flake check - when: - event: *all_events - name: build (${TYPE}) image: bash commands: # If dev, include drafts and future articles, change base URL - nix develop -c make ${MAKE_TARGET} - when: - event: *all_events - name: deploy (${TYPE}) image: bash @@ -53,8 +45,6 @@ pipeline: target: sync_host commands: - "nix run github:ambroisie/nix-config#drone-rsync" - when: - event: *all_events - name: notify image: bash @@ -70,7 +60,6 @@ pipeline: commands: - nix run github:ambroisie/matrix-notifier when: - event: *all_events status: - failure - success From f7cb6b0444d947c0540e7da625bcf474e1481e5d Mon Sep 17 00:00:00 2001 From: Bruno BELANYI Date: Sat, 18 Nov 2023 20:03:51 +0000 Subject: [PATCH 030/107] ci: remove deprecated syntax --- .woodpecker/deploy.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.woodpecker/deploy.yml b/.woodpecker/deploy.yml index ac8f3fd..c646113 100644 --- a/.woodpecker/deploy.yml +++ b/.woodpecker/deploy.yml @@ -15,7 +15,7 @@ when: evaluate: | ((CI_COMMIT_BRANCH == CI_REPO_DEFAULT_BRANCH) == ("${TYPE}" == "prod")) -pipeline: +steps: - name: check image: bash commands: From f11a72ddfefdcc5f0124f0a5f001443c13a2413a Mon Sep 17 00:00:00 2001 From: Bruno BELANYI Date: Mon, 4 Dec 2023 22:01:15 +0000 Subject: [PATCH 031/107] content: wish-lists: add meat thermometer For some reason I thought it was only usable with an app, so I had dismissed it as a possibility. But no there is a display available, so it's actually a great option. Get a *little* sassy about the range extender, because it is truly not a necessary expense. --- content/wish-lists.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/content/wish-lists.md b/content/wish-lists.md index 4f29bba..2c62ef6 100644 --- a/content/wish-lists.md +++ b/content/wish-lists.md @@ -21,4 +21,6 @@ A few of my Amazon wish lists in case you want to give me a gift. * Knives * [Gyuto, damascus finish, 210mm](https://www.kitchenprovisions.co.uk/collections/stainless-steel/products/gyuto-knife-vg10-damascus-finish-ohishi) * [Gyuto, polished finish, black ferrule 240mm](https://www.kitchenprovisions.co.uk/collections/sukenari/products/gyuto-knife-hap40-powder-steel-polished-finish-sukenari?variant=39743545016382) + * [Combustion Inc thermometer and display](https://combustion.inc/products/predictive-thermometer-and-display) + * [Get the one with the range extender if you *really* want to spoil me](https://combustion.inc/products/predictive-thermometer-display) * [Lego bonsai tree](https://www.lego.com/en-gb/product/bonsai-tree-10281) From 584c1eff56a14c49d7cfc9da12528ed92906bcf0 Mon Sep 17 00:00:00 2001 From: Bruno BELANYI Date: Wed, 14 Feb 2024 13:33:38 +0000 Subject: [PATCH 032/107] content: wish-lists: remove lego bonsai --- content/wish-lists.md | 1 - 1 file changed, 1 deletion(-) diff --git a/content/wish-lists.md b/content/wish-lists.md index 2c62ef6..5e3b735 100644 --- a/content/wish-lists.md +++ b/content/wish-lists.md @@ -23,4 +23,3 @@ A few of my Amazon wish lists in case you want to give me a gift. * [Gyuto, polished finish, black ferrule 240mm](https://www.kitchenprovisions.co.uk/collections/sukenari/products/gyuto-knife-hap40-powder-steel-polished-finish-sukenari?variant=39743545016382) * [Combustion Inc thermometer and display](https://combustion.inc/products/predictive-thermometer-and-display) * [Get the one with the range extender if you *really* want to spoil me](https://combustion.inc/products/predictive-thermometer-display) - * [Lego bonsai tree](https://www.lego.com/en-gb/product/bonsai-tree-10281) From e9940837813b2b70aa79e509ac22d0803a272b3a Mon Sep 17 00:00:00 2001 From: Bruno BELANYI Date: Mon, 24 Jun 2024 20:57:13 +0100 Subject: [PATCH 033/107] themes: anubis: bump submodule --- themes/anubis | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/themes/anubis b/themes/anubis index 10c340b..d77e0d6 160000 --- a/themes/anubis +++ b/themes/anubis @@ -1 +1 @@ -Subproject commit 10c340b0112b677acc5c8e60b6d9b7b6b4c91334 +Subproject commit d77e0d6d552ad135138f1f9443ef30cc5af8f0f3 From 39a5827d74152927cd5feef1b857e87709915486 Mon Sep 17 00:00:00 2001 From: Bruno BELANYI Date: Tue, 25 Jun 2024 09:34:37 +0000 Subject: [PATCH 034/107] posts: git-basics: remove series --- content/posts/2020-12-07-git-basics/index.md | 2 -- 1 file changed, 2 deletions(-) diff --git a/content/posts/2020-12-07-git-basics/index.md b/content/posts/2020-12-07-git-basics/index.md index e39c8bb..29187aa 100644 --- a/content/posts/2020-12-07-git-basics/index.md +++ b/content/posts/2020-12-07-git-basics/index.md @@ -8,8 +8,6 @@ tags: - cli categories: - programming -series: - - Git basics favorite: false --- From 2ca4a39106649170d7d4d89679096abea32bce93 Mon Sep 17 00:00:00 2001 From: Bruno BELANYI Date: Mon, 24 Jun 2024 23:05:48 +0100 Subject: [PATCH 035/107] archetypes: add missing tags --- archetypes/default.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/archetypes/default.md b/archetypes/default.md index 3529484..12912b7 100644 --- a/archetypes/default.md +++ b/archetypes/default.md @@ -5,15 +5,18 @@ draft: false # I don't care for draft mode, git has branches for that description: "" tags: - accounting + - algorithms - c++ - ci/cd - cli + - data structures - design-pattern - docker - drone - git - hugo - nix + - python - self-hosting - test categories: From e9e5a3bca001f1ea261303b3d5e4eb2f95cc7b8a Mon Sep 17 00:00:00 2001 From: Bruno BELANYI Date: Mon, 24 Jun 2024 23:01:48 +0100 Subject: [PATCH 036/107] posts: add union-find --- content/posts/2024-06-24-union-find/index.md | 26 ++++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 content/posts/2024-06-24-union-find/index.md diff --git a/content/posts/2024-06-24-union-find/index.md b/content/posts/2024-06-24-union-find/index.md new file mode 100644 index 0000000..930fcd9 --- /dev/null +++ b/content/posts/2024-06-24-union-find/index.md @@ -0,0 +1,26 @@ +--- +title: "Union Find" +date: 2024-06-24T21:07:49+01:00 +draft: false # I don't care for draft mode, git has branches for that +description: "My favorite data structure" +tags: + - algorithms + - data structures + - python +categories: + - programming +series: + - Cool algorithms +favorite: false +disable_feed: false +--- + +To kickoff the [series]({{< ref "/series/cool-algorithms/">}}) of posts about +algorithms and data structures I find interesting, I will be talking about my +favorite one: the [_Disjoint Set_][wiki]. Also known as the _Union-Find_ data +structure, so named because of its two main operations: `ds.union(lhs, rhs)` and +`ds.find(elem)`. + +[wiki]: https://en.wikipedia.org/wiki/Disjoint-set_data_structure + + From 66006b33968a74ff8b4fd7d62719ea86300ca91e Mon Sep 17 00:00:00 2001 From: Bruno BELANYI Date: Mon, 24 Jun 2024 23:02:08 +0100 Subject: [PATCH 037/107] posts: union-find: add presentation --- content/posts/2024-06-24-union-find/index.md | 24 ++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/content/posts/2024-06-24-union-find/index.md b/content/posts/2024-06-24-union-find/index.md index 930fcd9..83722a9 100644 --- a/content/posts/2024-06-24-union-find/index.md +++ b/content/posts/2024-06-24-union-find/index.md @@ -24,3 +24,27 @@ structure, so named because of its two main operations: `ds.union(lhs, rhs)` and [wiki]: https://en.wikipedia.org/wiki/Disjoint-set_data_structure + +## What does it do? + +The _Union-Find_ data structure allows one to store a collection of sets of +elements, with operations for adding new sets, merging two sets into one, and +finding the representative member of a set. Not only does it do all that, but it +does it in almost constant (amortized) time! + +Here is a small motivating example for using the _Disjoint Set_ data structure: + +```python +def connected_components(graph: Graph) -> list[set[Node]]: + # Initialize the disjoint set so that each node is in its own set + ds: DisjointSet[Node] = DisjointSet(graph.nodes) + # Each edge is a connection, merge both sides into the same set + for (start, dest) in graph.edges: + ds.union(start, dest) + # Connected components share the same (arbitrary) root + components: dict[Node, set[Node]] = defaultdict(set) + for n in graph.nodes: + components[ds.find(n)].add(n) + # Return a list of disjoint sets corresponding to each connected component + return list(components.values()) +``` From cc91e9eedd7b772f77129efcdd1b867f3f0b6985 Mon Sep 17 00:00:00 2001 From: Bruno BELANYI Date: Mon, 24 Jun 2024 23:02:46 +0100 Subject: [PATCH 038/107] posts: union-find: add construction --- content/posts/2024-06-24-union-find/index.md | 30 ++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/content/posts/2024-06-24-union-find/index.md b/content/posts/2024-06-24-union-find/index.md index 83722a9..54b1151 100644 --- a/content/posts/2024-06-24-union-find/index.md +++ b/content/posts/2024-06-24-union-find/index.md @@ -48,3 +48,33 @@ def connected_components(graph: Graph) -> list[set[Node]]: # Return a list of disjoint sets corresponding to each connected component return list(components.values()) ``` + +## Implementation + +I will show how to implement `UnionFind` for integers, though it can easily be +extended to be used with arbitrary types (e.g: by mapping each element +one-to-one to a distinct integer, or using a different set representation). + +### Representation + +Creating a new disjoint set is easy enough: + +```python +class UnionFind: + _parent: list[int] + _rank: list[int] + + def __init__(self, size: int): + # Each node is in its own set, making it its own parent... + self._parents = list(range(size)) + # ... And its rank 0 + self._rank = [0] * size +``` + +We represent each set through the `_parent` field: each element of the set is +linked to its parent, until the root node which is its own parent. When first +initializing the structure, each element is in its own set, so we initialize +each element to be a root and make it its own parent (`_parent[i] == i` for all +`i`). + +The `_rank` field is an optimization which we will touch on in a later section. From 60a8ea994a234b36b87c9525dad50e750514aeb7 Mon Sep 17 00:00:00 2001 From: Bruno BELANYI Date: Mon, 24 Jun 2024 23:03:09 +0100 Subject: [PATCH 039/107] posts: union-find: add 'find' --- content/posts/2024-06-24-union-find/index.md | 31 ++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/content/posts/2024-06-24-union-find/index.md b/content/posts/2024-06-24-union-find/index.md index 54b1151..7039469 100644 --- a/content/posts/2024-06-24-union-find/index.md +++ b/content/posts/2024-06-24-union-find/index.md @@ -78,3 +78,34 @@ each element to be a root and make it its own parent (`_parent[i] == i` for all `i`). The `_rank` field is an optimization which we will touch on in a later section. + +### Find + +A naive Implementation of `find(...)` is simple enough to write: + +```python +def find(self, elem: int) -> int: + # If `elem` is its own parent, then it is the root of the tree + if (parent := self._parent[elem]) == elem: + return elem + # Otherwise, recurse on the parent + return self.find(parent) +``` + +However, going back up the chain of parents each time we want to find the root +node (an `O(n)` operation) would make for disastrous performance. Instead we can +do a small optimization called _path splitting_. + +```python +def find(self, elem: int) -> int: + while (parent := self._parent[elem]) != elem: + # Replace each parent link by a link to the grand-parent + elem, self._parent[elem] = parent, self._parent[parent] + return elem +``` + +This flattens the chain so that each node links more directly to the root (the +length is reduced by half), making each subsequent `find(...)` faster. + +Other compression schemes exist, along the spectrum between faster shortening +the chain faster earlier, or updating `_parent` fewer times per `find(...)`. From 8dc12214756680923e4fec52e7e7da64a6779f89 Mon Sep 17 00:00:00 2001 From: Bruno BELANYI Date: Mon, 24 Jun 2024 23:03:24 +0100 Subject: [PATCH 040/107] posts: union-find: add 'union' --- content/posts/2024-06-24-union-find/index.md | 43 ++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/content/posts/2024-06-24-union-find/index.md b/content/posts/2024-06-24-union-find/index.md index 7039469..2fe1570 100644 --- a/content/posts/2024-06-24-union-find/index.md +++ b/content/posts/2024-06-24-union-find/index.md @@ -109,3 +109,46 @@ length is reduced by half), making each subsequent `find(...)` faster. Other compression schemes exist, along the spectrum between faster shortening the chain faster earlier, or updating `_parent` fewer times per `find(...)`. + +### Union + +A naive implementation of `union(...)` is simple enough to write: + +```python +def union(self, lhs: int, rhs: int) -> int: + # Replace both element by their root parent + lhs = self.find(lhs) + rhs = self.find(rhs) + # arbitrarily merge one into the other + self._parent[rhs] = lhs + # Return the new root + return lhs +``` + +Once again, improvements can be made. Depending on the order in which we call +`union(...)`, we might end up creating a long chain from the leaf of the tree to +the root node, leading to slower `find(...)` operations. If at all possible, we +would like to keep the trees as shallow as possible. + +To do so, we want to avoid merging taller trees into smaller ones, so as to keep +them as balanced as possible. Since a higher tree will result in a slower +`find(...)`, keeping the trees balanced will lead to increased performance. + +This is where the `_rank` field we mentioned earlier comes in: the _rank_ of an +element is an upper bound on its height in the tree. By keeping track of this +_approximate_ height, we can keep the trees balanced when merging them. + +```python +def union(self, lhs: int, rhs: int) -> int: + lhs = self.find(lhs) + rhs = self.find(rhs) + # Always keep `lhs` as the taller tree + if (self._rank[lhs] < self._rank[rhs]) + lhs, rhs = rhs, lhs + # Merge the smaller tree into the taller one + self._parent[rhs] = lhs + # Update the rank when merging trees of approximately the same size + if self._rank[lhs] == self._rank[rhs]: + self._rank[lhs] += 1 + return lhs +``` From 03866def1d5c07af0f4b426f00169357b8dfa2d6 Mon Sep 17 00:00:00 2001 From: Bruno BELANYI Date: Wed, 26 Jun 2024 13:43:55 +0000 Subject: [PATCH 041/107] posts: polymorphic-flyweight: fix typo --- content/posts/2020-07-22-polymorphic-flyweight-cpp/index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/content/posts/2020-07-22-polymorphic-flyweight-cpp/index.md b/content/posts/2020-07-22-polymorphic-flyweight-cpp/index.md index 5ff4b1f..2311002 100644 --- a/content/posts/2020-07-22-polymorphic-flyweight-cpp/index.md +++ b/content/posts/2020-07-22-polymorphic-flyweight-cpp/index.md @@ -68,7 +68,7 @@ public: const std::type_index lhs_i(lhs); const std::type_index rhs_i(rhs); if (lhs_i != rhs_i) - returh lhs_i < rhs_i; + return lhs_i < rhs_i; // We are now assured that both classes have the same type return less_than(rhs); } From 23a2d4d24933830c882c3b1966c2076c567603ee Mon Sep 17 00:00:00 2001 From: Bruno BELANYI Date: Wed, 26 Jun 2024 13:44:23 +0000 Subject: [PATCH 042/107] posts: generic-flyweight: fix typo --- content/posts/2020-07-16-generic-flyweight-cpp/index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/content/posts/2020-07-16-generic-flyweight-cpp/index.md b/content/posts/2020-07-16-generic-flyweight-cpp/index.md index 303b3db..3ca1e3a 100644 --- a/content/posts/2020-07-16-generic-flyweight-cpp/index.md +++ b/content/posts/2020-07-16-generic-flyweight-cpp/index.md @@ -16,7 +16,7 @@ favorite: false The flyweight is a well-known [GoF](https://en.wikipedia.org/wiki/Design_Patterns) design pattern. -It's intent is to minimize memory usage by reducing the number of instantiations +Its intent is to minimize memory usage by reducing the number of instantiations of a given object. I will show you how to implement a robust flyweight in C++, as well as a way to From adc514215c17c6ef1e4bb94a9b591f67237b08f5 Mon Sep 17 00:00:00 2001 From: Bruno BELANYI Date: Thu, 27 Jun 2024 10:41:25 +0000 Subject: [PATCH 043/107] ci: remove deprecated syntax --- .woodpecker/deploy.yml | 43 +++++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 22 deletions(-) diff --git a/.woodpecker/deploy.yml b/.woodpecker/deploy.yml index c646113..a6a8b0f 100644 --- a/.woodpecker/deploy.yml +++ b/.woodpecker/deploy.yml @@ -30,33 +30,32 @@ steps: - name: deploy (${TYPE}) image: bash environment: - # Trailing slash to synchronize the folder's *content* to the target - - SYNC_SOURCE=public/ - secrets: - - source: ssh_key - target: sync_key - - source: ssh_port - target: sync_port - - source: ${SSH_TARGET} - target: sync_target - - source: ssh_user - target: sync_username - - source: ssh_host - target: sync_host + # Trailing slash to synchronize the folder's *content* to the target + SYNC_SOURCE: public/ + SYNC_KEY: + from_secret: ssh_key + SYNC_PORT: + from_secret: ssh_port + SYNC_TARGET: + from_secret: ${SSH_TARGET} + SYNC_USERNAME: + from_secret: ssh_user + SYNC_HOST: + from_secret: ssh_host commands: - "nix run github:ambroisie/nix-config#drone-rsync" - name: notify image: bash - secrets: - - source: matrix_homeserver - target: address - - source: matrix_password - target: pass - - source: matrix_roomid - target: room - - source: matrix_username - target: user + environment: + ADDRESS: + from_secret: matrix_homeserver + ROOM: + from_secret: matrix_roomid + USER: + from_secret: matrix_username + PASS: + from_secret: matrix_password commands: - nix run github:ambroisie/matrix-notifier when: From 7873828c4c523f98104767a9f1fc70e69611e00f Mon Sep 17 00:00:00 2001 From: Bruno BELANYI Date: Thu, 27 Jun 2024 13:16:44 +0000 Subject: [PATCH 044/107] posts: union-find: fix union of same root --- content/posts/2024-06-24-union-find/index.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/content/posts/2024-06-24-union-find/index.md b/content/posts/2024-06-24-union-find/index.md index 2fe1570..7c9435c 100644 --- a/content/posts/2024-06-24-union-find/index.md +++ b/content/posts/2024-06-24-union-find/index.md @@ -142,6 +142,9 @@ _approximate_ height, we can keep the trees balanced when merging them. def union(self, lhs: int, rhs: int) -> int: lhs = self.find(lhs) rhs = self.find(rhs) + # Bail out early if they already belong to the same set + if lhs == rhs: + return lhs # Always keep `lhs` as the taller tree if (self._rank[lhs] < self._rank[rhs]) lhs, rhs = rhs, lhs From 1cde7ff5397bad306c9806aa2ff4214930d29270 Mon Sep 17 00:00:00 2001 From: Bruno BELANYI Date: Sun, 30 Jun 2024 12:34:08 +0100 Subject: [PATCH 045/107] themes: hugo-atom-feed: bump submodule --- themes/hugo-atom-feed | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/themes/hugo-atom-feed b/themes/hugo-atom-feed index 5da913d..d545eff 160000 --- a/themes/hugo-atom-feed +++ b/themes/hugo-atom-feed @@ -1 +1 @@ -Subproject commit 5da913dc46d2dadcaf6548256238c58c504476de +Subproject commit d545effed9949bf834eaed09ad423ec3e030794f From 8e5ac9d1e69de327f6e3b5c3bf2873be329d537b Mon Sep 17 00:00:00 2001 From: Bruno BELANYI Date: Sun, 30 Jun 2024 12:35:08 +0100 Subject: [PATCH 046/107] posts: add trie --- content/posts/2024-06-30-trie/index.md | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 content/posts/2024-06-30-trie/index.md diff --git a/content/posts/2024-06-30-trie/index.md b/content/posts/2024-06-30-trie/index.md new file mode 100644 index 0000000..a4e8959 --- /dev/null +++ b/content/posts/2024-06-30-trie/index.md @@ -0,0 +1,23 @@ +--- +title: "Trie" +date: 2024-06-30T11:07:49+01:00 +draft: false # I don't care for draft mode, git has branches for that +description: "A cool map" +tags: + - algorithms + - data structures + - python +categories: + - programming +series: + - Cool algorithms +favorite: false +disable_feed: false +--- + +This time, let's talk about the [_Trie_][wiki], which is a tree-based mapping +structure most often used for string keys. + +[wiki]: https://en.wikipedia.org/wiki/Trie + + From 53b968e36ce899aaf64537ccb15492bee9c10319 Mon Sep 17 00:00:00 2001 From: Bruno BELANYI Date: Sun, 30 Jun 2024 12:36:17 +0100 Subject: [PATCH 047/107] posts: trie: add presentation --- content/posts/2024-06-30-trie/index.md | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/content/posts/2024-06-30-trie/index.md b/content/posts/2024-06-30-trie/index.md index a4e8959..32b6fb8 100644 --- a/content/posts/2024-06-30-trie/index.md +++ b/content/posts/2024-06-30-trie/index.md @@ -21,3 +21,16 @@ structure most often used for string keys. [wiki]: https://en.wikipedia.org/wiki/Trie + +## What does it do? + +A _Trie_ can be used to map a set of string keys to their corresponding values, +without the need for a hash function. This also means you won't suffer from hash +collisions, though the tree-based structure will probably translate to slower +performance than a good hash table. + +A _Trie_ is especially useful to represent a dictionary of words in the case of +spell correction, as it can easily be used to fuzzy match words under a given +edit distance (think [Levenshtein distance]) + +[Levenshtein distance]: https://en.wikipedia.org/wiki/Levenshtein_distance From 674410694028d4e6413d2c82a638b1d755e024de Mon Sep 17 00:00:00 2001 From: Bruno BELANYI Date: Sun, 30 Jun 2024 12:36:42 +0100 Subject: [PATCH 048/107] posts: trie: add construction --- content/posts/2024-06-30-trie/index.md | 29 ++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/content/posts/2024-06-30-trie/index.md b/content/posts/2024-06-30-trie/index.md index 32b6fb8..584e856 100644 --- a/content/posts/2024-06-30-trie/index.md +++ b/content/posts/2024-06-30-trie/index.md @@ -34,3 +34,32 @@ spell correction, as it can easily be used to fuzzy match words under a given edit distance (think [Levenshtein distance]) [Levenshtein distance]: https://en.wikipedia.org/wiki/Levenshtein_distance + +## Implementation + +This implementation will be in Python for exposition purposes, even though +it already has a built-in `dict`. + +### Representation + +Creating a new `Trie` is easy: the root node starts off empty and without any +mapped values. + +```python +class Trie[T]: + _children: dict[str, Trie[T]] + _value: T | None + + def __init__(self): + # Each letter is mapped to a Trie + self._children = defaultdict(Trie) + # If we match a full string, we store the mapped value + self._value = None +``` + +We're using a `defaultdict` for the children for ease of implementation in this +post. In reality, I would encourage you exit early when you can't match a given +character. + +The string key will be implicit by the position of a node in the tree: the empty +string at the root, one-character strings as its direct children, etc... From 7e9fd69cced39d50ff0195a85ec33d19a391b18c Mon Sep 17 00:00:00 2001 From: Bruno BELANYI Date: Sun, 30 Jun 2024 12:37:04 +0100 Subject: [PATCH 049/107] posts: trie: add search --- content/posts/2024-06-30-trie/index.md | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/content/posts/2024-06-30-trie/index.md b/content/posts/2024-06-30-trie/index.md index 584e856..968aa0e 100644 --- a/content/posts/2024-06-30-trie/index.md +++ b/content/posts/2024-06-30-trie/index.md @@ -63,3 +63,18 @@ character. The string key will be implicit by the position of a node in the tree: the empty string at the root, one-character strings as its direct children, etc... + +### Search + +An exact match look-up is easily done: we go down the tree until we've exhausted +the key. At that point we've either found a mapped value or not. + +```python +def get(self, key: str) -> T | None: + # Have we matched the full key? + if not key: + # Store the `T` if mapped, `None` otherwise + return self._value + # Otherwise, recurse on the child corresponding to the first letter + return self._children[key[0]].get(key[1:]) +``` From 55982909d2ce8b7f3cc67de3d97f1982316ecc41 Mon Sep 17 00:00:00 2001 From: Bruno BELANYI Date: Sun, 30 Jun 2024 12:37:21 +0100 Subject: [PATCH 050/107] posts: trie: add insertion --- content/posts/2024-06-30-trie/index.md | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/content/posts/2024-06-30-trie/index.md b/content/posts/2024-06-30-trie/index.md index 968aa0e..b8e4679 100644 --- a/content/posts/2024-06-30-trie/index.md +++ b/content/posts/2024-06-30-trie/index.md @@ -78,3 +78,22 @@ def get(self, key: str) -> T | None: # Otherwise, recurse on the child corresponding to the first letter return self._children[key[0]].get(key[1:]) ``` + +### Insertion + +Adding a new value to the _Trie_ is similar to a key lookup, only this time we +store the new value instead of returning it. + +```python +def insert(self, key: str, value: T) -> bool: + # Have we matched the full key? + if not key: + # Check whether we're overwriting a previous mapping + was_mapped = self._value is None + # Store the corresponding value + self._value = value + # Return whether we've performed an overwrite + return was_mapped + # Otherwise, recurse on the child corresponding to the first letter + return self._children[key[0]].insert(key[1:], value) +``` From 1d37e00b3a9afe578ef4441e0b49ac375dc62a03 Mon Sep 17 00:00:00 2001 From: Bruno BELANYI Date: Sun, 30 Jun 2024 12:37:48 +0100 Subject: [PATCH 051/107] posts: trie: add removal --- content/posts/2024-06-30-trie/index.md | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/content/posts/2024-06-30-trie/index.md b/content/posts/2024-06-30-trie/index.md index b8e4679..2a0d77e 100644 --- a/content/posts/2024-06-30-trie/index.md +++ b/content/posts/2024-06-30-trie/index.md @@ -97,3 +97,20 @@ def insert(self, key: str, value: T) -> bool: # Otherwise, recurse on the child corresponding to the first letter return self._children[key[0]].insert(key[1:], value) ``` + +### Removal + +Removal should also look familiar. + +```python +def remove(self, key: str) -> bool: + # Have we matched the full key? + if not key: + was_mapped = self._value is None + # Remove the value + self._value = None + # Return whether it was mapped + return was_mapped + # Otherwise, recurse on the child corresponding to the first letter + return self._children[key[0]].remove(key[1:]) +``` From a0e20dd341261cb66ad231bf6cddf92e35b96f24 Mon Sep 17 00:00:00 2001 From: Bruno BELANYI Date: Sun, 30 Jun 2024 12:38:01 +0100 Subject: [PATCH 052/107] posts: trie: add fuzzy matching --- content/posts/2024-06-30-trie/index.md | 55 ++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/content/posts/2024-06-30-trie/index.md b/content/posts/2024-06-30-trie/index.md index 2a0d77e..aef49e3 100644 --- a/content/posts/2024-06-30-trie/index.md +++ b/content/posts/2024-06-30-trie/index.md @@ -114,3 +114,58 @@ def remove(self, key: str) -> bool: # Otherwise, recurse on the child corresponding to the first letter return self._children[key[0]].remove(key[1:]) ``` + +### Fuzzy matching + +Fuzzily matching a given word is where the real difficulty is: the key is to +realize we can use the prefix-tree nature of a _Trie_ to avoid doing wasteful +work. + +By leveraging the prefix visit order of the tree, we can build an iterative +Levenshtein distance matrix, in much the same way one would do so in its +[Dynamic Programming] implementation (see the [Wagner-Fisher algorithm]). + +[Dynamic Programming]: https://en.wikipedia.org/wiki/Dynamic_programming +[Wagner-Fisher algorithm]: https://en.wikipedia.org/wiki/Wagner%E2%80%93Fischer_algorithm + +```python +class FuzzyResult[T](NamedTuple): + distance: int + key: str + value: T + + +def get_fuzzy(self, key: str, max_distance: int = 0) -> Iterator[FuzzyResult[T]]: + def helper( + current_word: str, + node: Trie[T], + previous_row: list[int], + ) -> Iterator[tuple[int, T]]: + # Iterative Levenshtein + current_row = [previous_row[0] + 1] + current_char = current_word[-1] + for column, key_char in enumerate(key, start=1): + insertion = current_row[column - 1] + 1 + deletion = previous_row[column] + 1 + replacement = previous_row[column - 1] + (key_char != current_char) + current_row.append(min(insertion, deletion, replacement)) + + # If we are under the max distance, match this node + if (distance := current_row[-1]) <= max_distance and node._value != None: + # Only if it has a value of course + yield FuzzyResult(distance, current_word, node._value) + + # If we can potentially still match children, recurse + if min(current_row) <= max_distance: + for c, child in node._children.items(): + yield from helper(current_word + c, child, current_row) + + # Build the first row -- the edit distance from the empty string + row = list(range(len(key) + 1)) + + # Base case for the empty string + if (distance := row[-1]) <= max_distance and self._value != None: + yield FuzzyResult(distance, "", self._value) + for c, child in self._children.items(): + yield from helper(c, child, row) +``` From f2fa93ad8bf4eacd3cda82d638a0f63adfd2c1ba Mon Sep 17 00:00:00 2001 From: Bruno BELANYI Date: Sat, 6 Jul 2024 23:33:47 +0100 Subject: [PATCH 053/107] posts: add gap-buffer --- content/posts/2024-07-06-gap-buffer/index.md | 25 ++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 content/posts/2024-07-06-gap-buffer/index.md diff --git a/content/posts/2024-07-06-gap-buffer/index.md b/content/posts/2024-07-06-gap-buffer/index.md new file mode 100644 index 0000000..fa8f1c5 --- /dev/null +++ b/content/posts/2024-07-06-gap-buffer/index.md @@ -0,0 +1,25 @@ +--- +title: "Gap Buffer" +date: 2024-07-06T21:27:19+01:00 +draft: false # I don't care for draft mode, git has branches for that +description: "As featured in GNU Emacs" +tags: + - algorithms + - data structures + - python +categories: + - programming +series: + - Cool algorithms +favorite: false +disable_feed: false +--- + +The [_Gap Buffer_][wiki] is a popular data structure for text editors to +represent files and editable buffers. The most famous of them probably being +[GNU Emacs][emacs]. + +[wiki]: https://en.wikipedia.org/wiki/Gap_buffer +[emacs]: https://www.gnu.org/software/emacs/manual/html_node/elisp/Buffer-Gap.html + + From 51a1bd01cd98daca2c3f374d7d2cbe96f88e4c0c Mon Sep 17 00:00:00 2001 From: Bruno BELANYI Date: Sat, 6 Jul 2024 23:34:49 +0100 Subject: [PATCH 054/107] posts: gap-buffer: add presentation --- content/posts/2024-07-06-gap-buffer/index.md | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/content/posts/2024-07-06-gap-buffer/index.md b/content/posts/2024-07-06-gap-buffer/index.md index fa8f1c5..d13ef4e 100644 --- a/content/posts/2024-07-06-gap-buffer/index.md +++ b/content/posts/2024-07-06-gap-buffer/index.md @@ -23,3 +23,13 @@ represent files and editable buffers. The most famous of them probably being [emacs]: https://www.gnu.org/software/emacs/manual/html_node/elisp/Buffer-Gap.html + +## What does it do? + +A _Gap Buffer_ is simply a list of characters, similar to a normal string, with +the added twist of splitting it into two side: the prefix and suffix, on either +side of the cursor. In between them, a gap is left to allow for quick +insertion at the cursor. + +Moving the cursor moves the gap around the buffer, the prefix and suffix getting +shorter/longer as required. From a9f003f4ee168705762a503fa7762059e6c16ec5 Mon Sep 17 00:00:00 2001 From: Bruno BELANYI Date: Sat, 6 Jul 2024 23:35:39 +0100 Subject: [PATCH 055/107] posts: gap-buffer: add construction --- content/posts/2024-07-06-gap-buffer/index.md | 39 ++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/content/posts/2024-07-06-gap-buffer/index.md b/content/posts/2024-07-06-gap-buffer/index.md index d13ef4e..db5d92b 100644 --- a/content/posts/2024-07-06-gap-buffer/index.md +++ b/content/posts/2024-07-06-gap-buffer/index.md @@ -33,3 +33,42 @@ insertion at the cursor. Moving the cursor moves the gap around the buffer, the prefix and suffix getting shorter/longer as required. + +## Implementation + +I'll be writing a sample implementation in Python, as with the rest of the +[series]({{< ref "/series/cool-algorithms/">}}). I don't think it showcases the +elegance of the _Gap Buffer_ in action like a C implementation full of +`memmove`s would, but it does makes it short and sweet. + +### Representation + +We'll be representing the gap buffer as an actual list of characters. + +Given that Python doesn't _have_ characters, let's settle for a list of strings, +each representing a single character... + +```python +Char = str + +class GapBuffer: + # List of characters, contains prefix and suffix of string with gap in the middle + _buf: list[Char] + # The gap is contained between [start, end) (i.e: buf[start:end]) + _gap_start: int + _gap_end: int + + # Visual representation of the gap buffer: + # This is a very [ ]long string. + # |<----------------------------------------------->| capacity + # |<------------>| |<-------->| string + # |<------------------->| gap + # |<------------>| prefix + # |<-------->| suffix + def __init__(self, initial_capacity: int = 16) -> None: + assert initial_capacity > 0 + # Initialize an empty gap buffer + self._buf = [""] * initial_capacity + self._gap_start = 0 + self._gap_end = initial_capacity +``` From 408b74daf7d0d43907aa33eb1aea3bd961200ab7 Mon Sep 17 00:00:00 2001 From: Bruno BELANYI Date: Sat, 6 Jul 2024 23:36:02 +0100 Subject: [PATCH 056/107] posts: gap-buffer: add accessors --- content/posts/2024-07-06-gap-buffer/index.md | 27 ++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/content/posts/2024-07-06-gap-buffer/index.md b/content/posts/2024-07-06-gap-buffer/index.md index db5d92b..1071a24 100644 --- a/content/posts/2024-07-06-gap-buffer/index.md +++ b/content/posts/2024-07-06-gap-buffer/index.md @@ -72,3 +72,30 @@ class GapBuffer: self._gap_start = 0 self._gap_end = initial_capacity ``` + +### Accessors + +I'm mostly adding these for exposition, and making it easier to write `assert`s +later. + +```python +@property +def capacity(self) -> int: + return len(self._buf) + +@property +def gap_length(self) -> int: + return self._gap_end - self._gap_start + +@property +def string_length(self) -> int: + return self.capacity - self.gap_length + +@property +def prefix_length(self) -> int: + return self._gap_start + +@property +def suffix_length(self) -> int: + return self.capacity - self._gap_end +``` From 4da83c971621a87804b5dec161e96f09eb99844a Mon Sep 17 00:00:00 2001 From: Bruno BELANYI Date: Sun, 14 Jul 2024 17:53:25 +0100 Subject: [PATCH 057/107] posts: add bloom-filter --- .../posts/2024-07-14-bloom-filter/index.md | 26 +++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 content/posts/2024-07-14-bloom-filter/index.md diff --git a/content/posts/2024-07-14-bloom-filter/index.md b/content/posts/2024-07-14-bloom-filter/index.md new file mode 100644 index 0000000..690f1b7 --- /dev/null +++ b/content/posts/2024-07-14-bloom-filter/index.md @@ -0,0 +1,26 @@ +--- +title: "Bloom Filter" +date: 2024-07-14T17:46:40+01:00 +draft: false # I don't care for draft mode, git has branches for that +description: "Probably cool" +tags: + - algorithms + - data structures + - python +categories: + - programming +series: + - Cool algorithms +favorite: false +disable_feed: false +--- + +The [_Bloom Filter_][wiki] is a probabilistic data structure for set membership. + +The filter can be used as an inexpensive first step when querying the actual +data is quite costly (e.g: as a first check for expensive cache lookups or large +data seeks). + +[wiki]: https://en.wikipedia.org/wiki/Bloom_filter + + From 06c4a03a42b2e61684830fb115270fd152c17cfe Mon Sep 17 00:00:00 2001 From: Bruno BELANYI Date: Sat, 6 Jul 2024 23:36:20 +0100 Subject: [PATCH 058/107] posts: gap-buffer: add growth --- content/posts/2024-07-06-gap-buffer/index.md | 22 ++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/content/posts/2024-07-06-gap-buffer/index.md b/content/posts/2024-07-06-gap-buffer/index.md index 1071a24..5e5cd4c 100644 --- a/content/posts/2024-07-06-gap-buffer/index.md +++ b/content/posts/2024-07-06-gap-buffer/index.md @@ -99,3 +99,25 @@ def prefix_length(self) -> int: def suffix_length(self) -> int: return self.capacity - self._gap_end ``` + +### Growing the buffer + +I've written this method in a somewhat non-idiomatic manner, to make it closer +to how it would look in C using `realloc` instead. + +It would be more efficient to use slicing to insert the needed extra capacity +directly, instead of making a new buffer and copying characters over. + +```python +def grow(self, capacity: int) -> None: + assert capacity >= self.capacity + # Create a new buffer with the new capacity + new_buf = [""] * capacity + # Move the prefix/suffix to their place in the new buffer + added_capacity = capacity - len(self._buf) + new_buf[: self._gap_start] = self._buf[: self._gap_start] + new_buf[self._gap_end + added_capacity :] = self._buf[self._gap_end :] + # Use the new buffer, account for added capacity + self._buf = new_buf + self._gap_end += added_capacity +``` From 4abcd27ee7cec11d9e204c22e047d21dc2c907c0 Mon Sep 17 00:00:00 2001 From: Bruno BELANYI Date: Sun, 14 Jul 2024 17:54:59 +0100 Subject: [PATCH 059/107] posts: bloom-filter: add presentation --- content/posts/2024-07-14-bloom-filter/index.md | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/content/posts/2024-07-14-bloom-filter/index.md b/content/posts/2024-07-14-bloom-filter/index.md index 690f1b7..717436f 100644 --- a/content/posts/2024-07-14-bloom-filter/index.md +++ b/content/posts/2024-07-14-bloom-filter/index.md @@ -24,3 +24,16 @@ data seeks). [wiki]: https://en.wikipedia.org/wiki/Bloom_filter + +## What does it do? + +A _Bloom Filter_ can be understood as a hash-set which can either tell you: + +* An element is _not_ part of the set. +* An element _may be_ part of the set. + +More specifically, one can tweak the parameters of the filter to make it so that +the _false positive_ rate of membership is quite low. + +I won't be going into those calculations here, but they are quite trivial to +compute, or one can just look up appropriate values for their use case. From dbbcd528c3e5292cdd3548ee22c0a15408344079 Mon Sep 17 00:00:00 2001 From: Bruno BELANYI Date: Sat, 6 Jul 2024 23:36:33 +0100 Subject: [PATCH 060/107] posts: gap-buffer: add insertion --- content/posts/2024-07-06-gap-buffer/index.md | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/content/posts/2024-07-06-gap-buffer/index.md b/content/posts/2024-07-06-gap-buffer/index.md index 5e5cd4c..8c13eb1 100644 --- a/content/posts/2024-07-06-gap-buffer/index.md +++ b/content/posts/2024-07-06-gap-buffer/index.md @@ -121,3 +121,22 @@ def grow(self, capacity: int) -> None: self._buf = new_buf self._gap_end += added_capacity ``` + +### Insertion + +Inserting text at the cursor's position means filling up the gap in the middle +of the buffer. To do so we must first make sure that the gap is big enough, or +grow the buffer accordingly. + +Then inserting the text is simply a matter of copying its characters in place, +and moving the start of the gap further right. + +```python +def insert(self, val: str) -> None: + # Ensure we have enouh space to insert the whole string + if len(val) > self.gap_length: + self.grow(max(self.capacity * 2, self.string_length + len(val))) + # Fill the gap with the given string + self._buf[self._gap_start : self._gap_start + len(val)] = val + self._gap_start += len(val) +``` From 84ce6ea494d638c8910820e58d8e6d3b3bf9e745 Mon Sep 17 00:00:00 2001 From: Bruno BELANYI Date: Sun, 14 Jul 2024 17:55:15 +0100 Subject: [PATCH 061/107] posts: bloom-filter: add construction --- .../posts/2024-07-14-bloom-filter/index.md | 25 +++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/content/posts/2024-07-14-bloom-filter/index.md b/content/posts/2024-07-14-bloom-filter/index.md index 717436f..7064864 100644 --- a/content/posts/2024-07-14-bloom-filter/index.md +++ b/content/posts/2024-07-14-bloom-filter/index.md @@ -37,3 +37,28 @@ the _false positive_ rate of membership is quite low. I won't be going into those calculations here, but they are quite trivial to compute, or one can just look up appropriate values for their use case. + +## Implementation + +I'll be using Python, which has the nifty ability of representing bitsets +through its built-in big integers quite easily. + +We'll be assuming a `BIT_COUNT` of 64 here, but the implementation can easily be +tweaked to use a different number, or even change it at construction time. + +### Representation + +A `BloomFilter` is just a set of bits and a list of hash functions. + +```python +BIT_COUNT = 64 + +class BloomFilter[T]: + _bits: int + _hash_functions: list[Callable[[T], int]] + + def __init__(self, hash_functions: list[Callable[[T], int]]) -> None: + # Filter is initially empty + self._bits = 0 + self._hash_functions = hash_functions +``` From 11138dafd16fd8eae9d0b6f2764faa22d80e9100 Mon Sep 17 00:00:00 2001 From: Bruno BELANYI Date: Sat, 6 Jul 2024 23:36:46 +0100 Subject: [PATCH 062/107] posts: gap-buffer: add deletion --- content/posts/2024-07-06-gap-buffer/index.md | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/content/posts/2024-07-06-gap-buffer/index.md b/content/posts/2024-07-06-gap-buffer/index.md index 8c13eb1..929955e 100644 --- a/content/posts/2024-07-06-gap-buffer/index.md +++ b/content/posts/2024-07-06-gap-buffer/index.md @@ -140,3 +140,22 @@ def insert(self, val: str) -> None: self._buf[self._gap_start : self._gap_start + len(val)] = val self._gap_start += len(val) ``` + +### Deletion + +Removing text from the buffer simply expands the gap in the corresponding +direction, shortening the string's prefix/suffix. This makes it very cheap. + +The methods are named after the `backspace` and `delete` keys on the keyboard. + +```python +def backspace(self, dist: int = 1) -> None: + assert dist <= self.prefix_length + # Extend gap to the left + self._gap_start -= dist + +def delete(self, dist: int = 1) -> None: + assert dist <= self.suffix_length + # Extend gap to the right + self._gap_end += dist +``` From 114ca1de50360832418a701b56d1b4b7e6012af7 Mon Sep 17 00:00:00 2001 From: Bruno BELANYI Date: Sun, 14 Jul 2024 17:55:33 +0100 Subject: [PATCH 063/107] posts: bloom-filter: add insertion --- content/posts/2024-07-14-bloom-filter/index.md | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/content/posts/2024-07-14-bloom-filter/index.md b/content/posts/2024-07-14-bloom-filter/index.md index 7064864..7c02140 100644 --- a/content/posts/2024-07-14-bloom-filter/index.md +++ b/content/posts/2024-07-14-bloom-filter/index.md @@ -62,3 +62,18 @@ class BloomFilter[T]: self._bits = 0 self._hash_functions = hash_functions ``` + +### Inserting a key + +To add an element to the filter, we take the output from each hash function and +use that to set a bit in the filter. This combination of bit will identify the +element, which we can use for lookup later. + +```python +def insert(self, val: T) -> None: + # Iterate over each hash + for f in self._hash_functions: + n = f(val) % BIT_COUNT + # Set the corresponding bit + self._bit |= 1 << n +``` From e8acb49b53d3dc0e17d606c38f699bfdae7a4edf Mon Sep 17 00:00:00 2001 From: Bruno BELANYI Date: Sat, 6 Jul 2024 23:41:31 +0100 Subject: [PATCH 064/107] posts: gap-buffer: add movement --- content/posts/2024-07-06-gap-buffer/index.md | 30 ++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/content/posts/2024-07-06-gap-buffer/index.md b/content/posts/2024-07-06-gap-buffer/index.md index 929955e..a9aac96 100644 --- a/content/posts/2024-07-06-gap-buffer/index.md +++ b/content/posts/2024-07-06-gap-buffer/index.md @@ -159,3 +159,33 @@ def delete(self, dist: int = 1) -> None: # Extend gap to the right self._gap_end += dist ``` + +### Moving the cursor + +Moving the cursor along the buffer will shift letters from one side of the gap +to the other, moving them accross from prefix to suffix and back. + +I find Python's list slicing not quite as elegant to read as a `memmove`, though +it does make for a very small and efficient implementation. + +```python +def left(self, dist: int = 1) -> None: + assert dist <= self.prefix_length + # Shift the needed number of characters from end of prefix to start of suffix + self._buf[self._gap_end - dist : self._gap_end] = self._buf[ + self._gap_start - dist : self._gap_start + ] + # Adjust indices accordingly + self._gap_start -= dist + self._gap_end -= dist + +def right(self, dist: int = 1) -> None: + assert dist <= self.suffix_length + # Shift the needed number of characters from start of suffix to end of prefix + self._buf[self._gap_start : self._gap_start + dist] = self._buf[ + self._gap_end : self._gap_end + dist + ] + # Adjust indices accordingly + self._gap_start += dist + self._gap_end += dist +``` From 768acac4ae3e4f54ef6c59e38bcd690e6b96753c Mon Sep 17 00:00:00 2001 From: Bruno BELANYI Date: Sat, 20 Jul 2024 19:21:08 +0100 Subject: [PATCH 065/107] posts: add treap --- content/posts/2024-07-20-treap/index.md | 29 + content/posts/2024-07-20-treap/treap.gv | 1004 +++++++++++++++++++++++ 2 files changed, 1033 insertions(+) create mode 100644 content/posts/2024-07-20-treap/index.md create mode 100644 content/posts/2024-07-20-treap/treap.gv diff --git a/content/posts/2024-07-20-treap/index.md b/content/posts/2024-07-20-treap/index.md new file mode 100644 index 0000000..edcda59 --- /dev/null +++ b/content/posts/2024-07-20-treap/index.md @@ -0,0 +1,29 @@ +--- +title: "Treap" +date: 2024-07-20T14:12:27+01:00 +draft: false # I don't care for draft mode, git has branches for that +description: "A simpler BST" +tags: + - algorithms + - data structures + - python +categories: + - programming +series: + - Cool algorithms +favorite: false +disable_feed: false +graphviz: true +--- + +The [_Treap_][wiki] is a mix between a _Binary Search Tree_ and a _Heap_. + +Like a _Binary Search Tree_, it keeps an ordered set of keys in the shape of a +tree, allowing for binary search traversal. + +Like a _Heap_, it associates each node with a priority, making sure that a +parent's priority is always higher than any of its children. + +[wiki]: https://en.wikipedia.org/wiki/Treap + + diff --git a/content/posts/2024-07-20-treap/treap.gv b/content/posts/2024-07-20-treap/treap.gv new file mode 100644 index 0000000..156eff9 --- /dev/null +++ b/content/posts/2024-07-20-treap/treap.gv @@ -0,0 +1,1004 @@ +graph { + node [shape=circle] + 2[label=<2
0.9957869495270144>]; + 2 -- 1 [side=L] + 1[label=<1
0.8476937427586738>]; + 1 -- 0 [side=L] + 0[label=<0
0.7514570833207395>]; + 2 -- 145 [side=R] + 145[label=<145
0.9939888728473314>]; + 145 -- 12 [side=L] + 12[label=<12
0.9907319285760915>]; + 12 -- 5 [side=L] + 5[label=<5
0.988082370881665>]; + 5 -- 4 [side=L] + 4[label=<4
0.7422137887388395>]; + 4 -- 3 [side=L] + 3[label=<3
0.24440218394323976>]; + 5 -- 8 [side=R] + 8[label=<8
0.695547503791194>]; + 8 -- 6 [side=L] + 6[label=<6
0.5664414792316717>]; + 6 -- 7 [side=R] + 7[label=<7
0.11672897928969816>]; + 8 -- 11 [side=R] + 11[label=<11
0.45404068759973704>]; + 11 -- 9 [side=L] + 9[label=<9
0.3325186970255505>]; + 9 -- 10 [side=R] + 10[label=<10
0.10578109016044712>]; + 12 -- 47 [side=R] + 47[label=<47
0.9904839559513964>]; + 47 -- 33 [side=L] + 33[label=<33
0.9758188259688814>]; + 33 -- 28 [side=L] + 28[label=<28
0.915671870648212>]; + 28 -- 23 [side=L] + 23[label=<23
0.8487360320264957>]; + 23 -- 20 [side=L] + 20[label=<20
0.7063809958484802>]; + 20 -- 16 [side=L] + 16[label=<16
0.7034299359160114>]; + 16 -- 15 [side=L] + 15[label=<15
0.5922634040068163>]; + 15 -- 13 [side=L] + 13[label=<13
0.4543498778672278>]; + 13 -- 14 [side=R] + 14[label=<14
0.21605966187340298>]; + 16 -- 17 [side=R] + 17[label=<17
0.3709284358038115>]; + 17 -- 18 [side=R] + 18[label=<18
0.260599350868457>]; + 18 -- 19 [side=R] + 19[label=<19
0.1817886048339078>]; + 20 -- 21 [side=R] + 21[label=<21
0.14210053829804103>]; + 21 -- 22 [side=R] + 22[label=<22
0.08869917426783513>]; + 23 -- 25 [side=R] + 25[label=<25
0.4892716208875628>]; + 25 -- 24 [side=L] + 24[label=<24
0.25363061372658946>]; + 25 -- 26 [side=R] + 26[label=<26
0.13796660931463622>]; + 26 -- 27 [side=R] + 27[label=<27
0.12997273552181765>]; + 28 -- 31 [side=R] + 31[label=<31
0.8601319422972336>]; + 31 -- 29 [side=L] + 29[label=<29
0.8084698172467167>]; + 29 -- 30 [side=R] + 30[label=<30
0.27908621178937176>]; + 31 -- 32 [side=R] + 32[label=<32
0.0844421977476495>]; + 33 -- 35 [side=R] + 35[label=<35
0.967049096465691>]; + 35 -- 34 [side=L] + 34[label=<34
0.12377873295631248>]; + 35 -- 39 [side=R] + 39[label=<39
0.9328169604588316>]; + 39 -- 38 [side=L] + 38[label=<38
0.7787222122657034>]; + 38 -- 37 [side=L] + 37[label=<37
0.4043690842717128>]; + 37 -- 36 [side=L] + 36[label=<36
0.3046740248996985>]; + 39 -- 43 [side=R] + 43[label=<43
0.8350649185763811>]; + 43 -- 40 [side=L] + 40[label=<40
0.5406537578359348>]; + 40 -- 42 [side=R] + 42[label=<42
0.3193437618727414>]; + 42 -- 41 [side=L] + 41[label=<41
0.27788504363715394>]; + 43 -- 44 [side=R] + 44[label=<44
0.4830145820991081>]; + 44 -- 46 [side=R] + 46[label=<46
0.33052118143151343>]; + 46 -- 45 [side=L] + 45[label=<45
0.29551968300055065>]; + 47 -- 114 [side=R] + 114[label=<114
0.9876511078116038>]; + 114 -- 86 [side=L] + 86[label=<86
0.9735379804960496>]; + 86 -- 77 [side=L] + 77[label=<77
0.9269342780771458>]; + 77 -- 68 [side=L] + 68[label=<68
0.9230582358699485>]; + 68 -- 55 [side=L] + 55[label=<55
0.839490204679298>]; + 55 -- 51 [side=L] + 51[label=<51
0.3385715557270519>]; + 51 -- 50 [side=L] + 50[label=<50
0.3360608605201836>]; + 50 -- 49 [side=L] + 49[label=<49
0.2774445142713422>]; + 49 -- 48 [side=L] + 48[label=<48
0.17489390663513982>]; + 51 -- 52 [side=R] + 52[label=<52
0.1886079446757799>]; + 52 -- 54 [side=R] + 54[label=<54
0.12293678964136756>]; + 54 -- 53 [side=L] + 53[label=<53
0.08266549169366844>]; + 55 -- 64 [side=R] + 64[label=<64
0.5480541758673416>]; + 64 -- 61 [side=L] + 61[label=<61
0.4333510895474586>]; + 61 -- 58 [side=L] + 58[label=<58
0.22965679673111572>]; + 58 -- 57 [side=L] + 57[label=<57
0.1750392554452569>]; + 57 -- 56 [side=L] + 56[label=<56
0.1154682181055644>]; + 58 -- 60 [side=R] + 60[label=<60
0.19505416228675265>]; + 60 -- 59 [side=L] + 59[label=<59
0.15628574332026035>]; + 61 -- 63 [side=R] + 63[label=<63
0.23625159275800722>]; + 63 -- 62 [side=L] + 62[label=<62
0.10778840213677576>]; + 64 -- 66 [side=R] + 66[label=<66
0.4795221664420316>]; + 66 -- 65 [side=L] + 65[label=<65
0.29689544767487486>]; + 66 -- 67 [side=R] + 67[label=<67
0.1446107813011439>]; + 68 -- 75 [side=R] + 75[label=<75
0.9136368186744673>]; + 75 -- 73 [side=L] + 73[label=<73
0.8180762618516633>]; + 73 -- 72 [side=L] + 72[label=<72
0.790052528538962>]; + 72 -- 69 [side=L] + 69[label=<69
0.6662534331501744>]; + 69 -- 70 [side=R] + 70[label=<70
0.198170103856931>]; + 70 -- 71 [side=R] + 71[label=<71
0.09798170568469355>]; + 73 -- 74 [side=R] + 74[label=<74
0.3113326781632004>]; + 75 -- 76 [side=R] + 76[label=<76
0.878956270284837>]; + 77 -- 84 [side=R] + 84[label=<84
0.7298827990732385>]; + 84 -- 81 [side=L] + 81[label=<81
0.7294243899593819>]; + 81 -- 78 [side=L] + 78[label=<78
0.6751780813023192>]; + 78 -- 80 [side=R] + 80[label=<80
0.2765792284019555>]; + 80 -- 79 [side=L] + 79[label=<79
0.12189576635143229>]; + 81 -- 83 [side=R] + 83[label=<83
0.5986915452921237>]; + 83 -- 82 [side=L] + 82[label=<82
0.5846092825823913>]; + 84 -- 85 [side=R] + 85[label=<85
0.5030186847252209>]; + 86 -- 103 [side=R] + 103[label=<103
0.968253245483458>]; + 103 -- 88 [side=L] + 88[label=<88
0.9609670084738324>]; + 88 -- 87 [side=L] + 87[label=<87
0.5446892558733331>]; + 88 -- 97 [side=R] + 97[label=<97
0.9483161815322799>]; + 97 -- 92 [side=L] + 92[label=<92
0.9417148997777522>]; + 92 -- 89 [side=L] + 89[label=<89
0.2455511319571373>]; + 89 -- 90 [side=R] + 90[label=<90
0.15647633602612276>]; + 90 -- 91 [side=R] + 91[label=<91
0.13997459061178652>]; + 92 -- 93 [side=R] + 93[label=<93
0.7804508194274744>]; + 93 -- 95 [side=R] + 95[label=<95
0.7226672256819942>]; + 95 -- 94 [side=L] + 94[label=<94
0.43503881905350683>]; + 95 -- 96 [side=R] + 96[label=<96
0.40712673636645136>]; + 97 -- 101 [side=R] + 101[label=<101
0.9255359833317219>]; + 101 -- 99 [side=L] + 99[label=<99
0.7139348283525223>]; + 99 -- 98 [side=L] + 98[label=<98
0.21279150783985867>]; + 99 -- 100 [side=R] + 100[label=<100
0.48002453571456083>]; + 101 -- 102 [side=R] + 102[label=<102
0.6219674393805691>]; + 103 -- 106 [side=R] + 106[label=<106
0.8731379458552129>]; + 106 -- 104 [side=L] + 104[label=<104
0.5400277589431796>]; + 104 -- 105 [side=R] + 105[label=<105
0.0007891566377928871>]; + 106 -- 110 [side=R] + 110[label=<110
0.8382929650437165>]; + 110 -- 109 [side=L] + 109[label=<109
0.4615149406437542>]; + 109 -- 107 [side=L] + 107[label=<107
0.43337904959817486>]; + 107 -- 108 [side=R] + 108[label=<108
0.053658620013432023>]; + 110 -- 112 [side=R] + 112[label=<112
0.5679810533328041>]; + 112 -- 111 [side=L] + 111[label=<111
0.541018763444613>]; + 112 -- 113 [side=R] + 113[label=<113
0.42841646768139463>]; + 114 -- 123 [side=R] + 123[label=<123
0.9464643826259924>]; + 123 -- 119 [side=L] + 119[label=<119
0.8662877311047388>]; + 119 -- 116 [side=L] + 116[label=<116
0.7670170824076343>]; + 116 -- 115 [side=L] + 115[label=<115
0.17190031141596263>]; + 116 -- 118 [side=R] + 118[label=<118
0.3671674221014496>]; + 118 -- 117 [side=L] + 117[label=<117
0.2159590427591035>]; + 119 -- 121 [side=R] + 121[label=<121
0.74738638353363>]; + 121 -- 120 [side=L] + 120[label=<120
0.08665916013292596>]; + 121 -- 122 [side=R] + 122[label=<122
0.4119543486009871>]; + 123 -- 143 [side=R] + 143[label=<143
0.9247442715890171>]; + 143 -- 140 [side=L] + 140[label=<140
0.8936337545064225>]; + 140 -- 125 [side=L] + 125[label=<125
0.8517904638602968>]; + 125 -- 124 [side=L] + 124[label=<124
0.681815353261336>]; + 125 -- 126 [side=R] + 126[label=<126
0.7946927108945189>]; + 126 -- 133 [side=R] + 133[label=<133
0.7289879153749418>]; + 133 -- 128 [side=L] + 128[label=<128
0.7256302169867033>]; + 128 -- 127 [side=L] + 127[label=<127
0.6200343851508556>]; + 128 -- 131 [side=R] + 131[label=<131
0.7089639682237622>]; + 131 -- 129 [side=L] + 129[label=<129
0.5255088944172916>]; + 129 -- 130 [side=R] + 130[label=<130
0.41550021924797953>]; + 131 -- 132 [side=R] + 132[label=<132
0.1851371011016204>]; + 133 -- 134 [side=R] + 134[label=<134
0.6333388625482715>]; + 134 -- 135 [side=R] + 135[label=<135
0.4707182839428111>]; + 135 -- 138 [side=R] + 138[label=<138
0.4128350639522185>]; + 138 -- 137 [side=L] + 137[label=<137
0.25475301393491523>]; + 137 -- 136 [side=L] + 136[label=<136
0.04841095612377411>]; + 138 -- 139 [side=R] + 139[label=<139
0.3855980374729622>]; + 140 -- 141 [side=R] + 141[label=<141
0.8655582249610275>]; + 141 -- 142 [side=R] + 142[label=<142
0.2288591915235536>]; + 143 -- 144 [side=R] + 144[label=<144
0.36710686055015185>]; + 145 -- 235 [side=R] + 235[label=<235
0.9812914040931304>]; + 235 -- 169 [side=L] + 169[label=<169
0.9782585107854103>]; + 169 -- 148 [side=L] + 148[label=<148
0.9449023406787344>]; + 148 -- 147 [side=L] + 147[label=<147
0.7293740098286456>]; + 147 -- 146 [side=L] + 146[label=<146
0.509027051210447>]; + 148 -- 150 [side=R] + 150[label=<150
0.9020436951014624>]; + 150 -- 149 [side=L] + 149[label=<149
0.8190200516813236>]; + 150 -- 153 [side=R] + 153[label=<153
0.8855777631108436>]; + 153 -- 152 [side=L] + 152[label=<152
0.8603376027563862>]; + 152 -- 151 [side=L] + 151[label=<151
0.45459695989529647>]; + 153 -- 167 [side=R] + 167[label=<167
0.8492899523613442>]; + 167 -- 163 [side=L] + 163[label=<163
0.8177575678069656>]; + 163 -- 160 [side=L] + 160[label=<160
0.7759852430212377>]; + 160 -- 158 [side=L] + 158[label=<158
0.734924251943539>]; + 158 -- 156 [side=L] + 156[label=<156
0.710110660084087>]; + 156 -- 155 [side=L] + 155[label=<155
0.2959097323390557>]; + 155 -- 154 [side=L] + 154[label=<154
0.23060113716089548>]; + 156 -- 157 [side=R] + 157[label=<157
0.04398748527099361>]; + 158 -- 159 [side=R] + 159[label=<159
0.09047000528918914>]; + 160 -- 162 [side=R] + 162[label=<162
0.7216878041963218>]; + 162 -- 161 [side=L] + 161[label=<161
0.6871202472393689>]; + 163 -- 165 [side=R] + 165[label=<165
0.3886140063494328>]; + 165 -- 164 [side=L] + 164[label=<164
0.3870484684485387>]; + 165 -- 166 [side=R] + 166[label=<166
0.3476475192762013>]; + 167 -- 168 [side=R] + 168[label=<168
0.8169397159175148>]; + 169 -- 172 [side=R] + 172[label=<172
0.9747928133504129>]; + 172 -- 170 [side=L] + 170[label=<170
0.846452976104617>]; + 170 -- 171 [side=R] + 171[label=<171
0.3121690933042578>]; + 172 -- 230 [side=R] + 230[label=<230
0.958355558453681>]; + 230 -- 187 [side=L] + 187[label=<187
0.957557676245447>]; + 187 -- 180 [side=L] + 180[label=<180
0.92710438449752>]; + 180 -- 174 [side=L] + 174[label=<174
0.7049973940003331>]; + 174 -- 173 [side=L] + 173[label=<173
0.152023809588824>]; + 174 -- 179 [side=R] + 179[label=<179
0.6715227532708304>]; + 179 -- 176 [side=L] + 176[label=<176
0.6621862385446002>]; + 176 -- 175 [side=L] + 175[label=<175
0.1248086920689816>]; + 176 -- 178 [side=R] + 178[label=<178
0.5713418767827186>]; + 178 -- 177 [side=L] + 177[label=<177
0.5560171098718876>]; + 180 -- 183 [side=R] + 183[label=<183
0.9250644230760526>]; + 183 -- 182 [side=L] + 182[label=<182
0.6824354777265715>]; + 182 -- 181 [side=L] + 181[label=<181
0.012508171868127804>]; + 183 -- 184 [side=R] + 184[label=<184
0.4823271793225655>]; + 184 -- 185 [side=R] + 185[label=<185
0.3841022310929997>]; + 185 -- 186 [side=R] + 186[label=<186
0.18775042542247533>]; + 187 -- 203 [side=R] + 203[label=<203
0.8944186382789656>]; + 203 -- 194 [side=L] + 194[label=<194
0.878541152511799>]; + 194 -- 190 [side=L] + 190[label=<190
0.6762694909925744>]; + 190 -- 188 [side=L] + 188[label=<188
0.3829541869540283>]; + 188 -- 189 [side=R] + 189[label=<189
0.20943675668859407>]; + 190 -- 192 [side=R] + 192[label=<192
0.5817469625147841>]; + 192 -- 191 [side=L] + 191[label=<191
0.09503744303386963>]; + 192 -- 193 [side=R] + 193[label=<193
0.3644771285875493>]; + 194 -- 197 [side=R] + 197[label=<197
0.7948407869124952>]; + 197 -- 195 [side=L] + 195[label=<195
0.7942811285510138>]; + 195 -- 196 [side=R] + 196[label=<196
0.5627582929517205>]; + 197 -- 202 [side=R] + 202[label=<202
0.6971384514391066>]; + 202 -- 198 [side=L] + 198[label=<198
0.6927264474372884>]; + 198 -- 200 [side=R] + 200[label=<200
0.6455091049085492>]; + 200 -- 199 [side=L] + 199[label=<199
0.5952913724552451>]; + 200 -- 201 [side=R] + 201[label=<201
0.02134082793304981>]; + 203 -- 222 [side=R] + 222[label=<222
0.7905661844403327>]; + 222 -- 221 [side=L] + 221[label=<221
0.7344461814842598>]; + 221 -- 218 [side=L] + 218[label=<218
0.710158738001777>]; + 218 -- 214 [side=L] + 214[label=<214
0.6681641416928006>]; + 214 -- 210 [side=L] + 210[label=<210
0.6445674840025054>]; + 210 -- 204 [side=L] + 204[label=<204
0.4613335241960753>]; + 204 -- 207 [side=R] + 207[label=<207
0.4534069998012402>]; + 207 -- 205 [side=L] + 205[label=<205
0.3958801765835097>]; + 205 -- 206 [side=R] + 206[label=<206
0.11317604266906478>]; + 207 -- 208 [side=R] + 208[label=<208
0.15215468537761012>]; + 208 -- 209 [side=R] + 209[label=<209
0.1517155863611125>]; + 210 -- 213 [side=R] + 213[label=<213
0.5254528847170963>]; + 213 -- 212 [side=L] + 212[label=<212
0.3831362875816451>]; + 212 -- 211 [side=L] + 211[label=<211
0.05056020282973139>]; + 214 -- 215 [side=R] + 215[label=<215
0.5009367015686823>]; + 215 -- 216 [side=R] + 216[label=<216
0.10901874828554337>]; + 216 -- 217 [side=R] + 217[label=<217
0.05646524838685241>]; + 218 -- 220 [side=R] + 220[label=<220
0.674411240175005>]; + 220 -- 219 [side=L] + 219[label=<219
0.14299197003272757>]; + 222 -- 223 [side=R] + 223[label=<223
0.6736901076540927>]; + 223 -- 226 [side=R] + 226[label=<226
0.6137254504515176>]; + 226 -- 225 [side=L] + 225[label=<225
0.42057192814517086>]; + 225 -- 224 [side=L] + 224[label=<224
0.15459807255670532>]; + 226 -- 227 [side=R] + 227[label=<227
0.3240860179195383>]; + 227 -- 229 [side=R] + 229[label=<229
0.1917962317572811>]; + 229 -- 228 [side=L] + 228[label=<228
0.09544487292662296>]; + 230 -- 231 [side=R] + 231[label=<231
0.9537855788342208>]; + 231 -- 234 [side=R] + 234[label=<234
0.8201857812416328>]; + 234 -- 233 [side=L] + 233[label=<233
0.38950247492509926>]; + 233 -- 232 [side=L] + 232[label=<232
0.3117950672088875>]; + 235 -- 245 [side=R] + 245[label=<245
0.9606326065278836>]; + 245 -- 243 [side=L] + 243[label=<243
0.9010055124673463>]; + 243 -- 237 [side=L] + 237[label=<237
0.8906258722253291>]; + 237 -- 236 [side=L] + 236[label=<236
0.699210568923407>]; + 237 -- 239 [side=R] + 239[label=<239
0.8460173468831691>]; + 239 -- 238 [side=L] + 238[label=<238
0.11026801706799916>]; + 239 -- 242 [side=R] + 242[label=<242
0.8439586286718704>]; + 242 -- 240 [side=L] + 240[label=<240
0.6602788030770246>]; + 240 -- 241 [side=R] + 241[label=<241
0.5119345141518413>]; + 243 -- 244 [side=R] + 244[label=<244
0.7247536554274525>]; + 245 -- 246 [side=R] + 246[label=<246
0.5721754588545217>]; + 246 -- 247 [side=R] + 247[label=<247
0.5181752052426042>]; + 247 -- 249 [side=R] + 249[label=<249
0.07619775425067954>]; + 249 -- 248 [side=L] + 248[label=<248
0.03123446276525388>]; +} +graph { + node [shape=circle] + 2[label=<2
0.9957869495270144>]; + 2 -- 1 [side=L] + 1[label=<1
0.8476937427586738>]; + 1 -- 0 [side=L] + 0[label=<0
0.7514570833207395>]; + 2 -- 145 [side=R] + 145[label=<145
0.9939888728473314>]; + 145 -- 12 [side=L] + 12[label=<12
0.9907319285760915>]; + 12 -- 5 [side=L] + 5[label=<5
0.988082370881665>]; + 5 -- 4 [side=L] + 4[label=<4
0.7422137887388395>]; + 4 -- 3 [side=L] + 3[label=<3
0.24440218394323976>]; + 5 -- 8 [side=R] + 8[label=<8
0.695547503791194>]; + 8 -- 6 [side=L] + 6[label=<6
0.5664414792316717>]; + 6 -- 7 [side=R] + 7[label=<7
0.11672897928969816>]; + 8 -- 11 [side=R] + 11[label=<11
0.45404068759973704>]; + 11 -- 9 [side=L] + 9[label=<9
0.3325186970255505>]; + 9 -- 10 [side=R] + 10[label=<10
0.10578109016044712>]; + 12 -- 47 [side=R] + 47[label=<47
0.9904839559513964>]; + 47 -- 33 [side=L] + 33[label=<33
0.9758188259688814>]; + 33 -- 28 [side=L] + 28[label=<28
0.915671870648212>]; + 28 -- 23 [side=L] + 23[label=<23
0.8487360320264957>]; + 23 -- 20 [side=L] + 20[label=<20
0.7063809958484802>]; + 20 -- 16 [side=L] + 16[label=<16
0.7034299359160114>]; + 16 -- 15 [side=L] + 15[label=<15
0.5922634040068163>]; + 15 -- 13 [side=L] + 13[label=<13
0.4543498778672278>]; + 13 -- 14 [side=R] + 14[label=<14
0.21605966187340298>]; + 16 -- 17 [side=R] + 17[label=<17
0.3709284358038115>]; + 17 -- 18 [side=R] + 18[label=<18
0.260599350868457>]; + 18 -- 19 [side=R] + 19[label=<19
0.1817886048339078>]; + 20 -- 21 [side=R] + 21[label=<21
0.14210053829804103>]; + 21 -- 22 [side=R] + 22[label=<22
0.08869917426783513>]; + 23 -- 25 [side=R] + 25[label=<25
0.4892716208875628>]; + 25 -- 24 [side=L] + 24[label=<24
0.25363061372658946>]; + 25 -- 26 [side=R] + 26[label=<26
0.13796660931463622>]; + 26 -- 27 [side=R] + 27[label=<27
0.12997273552181765>]; + 28 -- 31 [side=R] + 31[label=<31
0.8601319422972336>]; + 31 -- 29 [side=L] + 29[label=<29
0.8084698172467167>]; + 29 -- 30 [side=R] + 30[label=<30
0.27908621178937176>]; + 31 -- 32 [side=R] + 32[label=<32
0.0844421977476495>]; + 33 -- 35 [side=R] + 35[label=<35
0.967049096465691>]; + 35 -- 34 [side=L] + 34[label=<34
0.12377873295631248>]; + 35 -- 39 [side=R] + 39[label=<39
0.9328169604588316>]; + 39 -- 38 [side=L] + 38[label=<38
0.7787222122657034>]; + 38 -- 37 [side=L] + 37[label=<37
0.4043690842717128>]; + 37 -- 36 [side=L] + 36[label=<36
0.3046740248996985>]; + 39 -- 43 [side=R] + 43[label=<43
0.8350649185763811>]; + 43 -- 40 [side=L] + 40[label=<40
0.5406537578359348>]; + 40 -- 42 [side=R] + 42[label=<42
0.3193437618727414>]; + 42 -- 41 [side=L] + 41[label=<41
0.27788504363715394>]; + 43 -- 44 [side=R] + 44[label=<44
0.4830145820991081>]; + 44 -- 46 [side=R] + 46[label=<46
0.33052118143151343>]; + 46 -- 45 [side=L] + 45[label=<45
0.29551968300055065>]; + 47 -- 114 [side=R] + 114[label=<114
0.9876511078116038>]; + 114 -- 86 [side=L] + 86[label=<86
0.9735379804960496>]; + 86 -- 77 [side=L] + 77[label=<77
0.9269342780771458>]; + 77 -- 68 [side=L] + 68[label=<68
0.9230582358699485>]; + 68 -- 55 [side=L] + 55[label=<55
0.839490204679298>]; + 55 -- 51 [side=L] + 51[label=<51
0.3385715557270519>]; + 51 -- 50 [side=L] + 50[label=<50
0.3360608605201836>]; + 50 -- 49 [side=L] + 49[label=<49
0.2774445142713422>]; + 49 -- 48 [side=L] + 48[label=<48
0.17489390663513982>]; + 51 -- 52 [side=R] + 52[label=<52
0.1886079446757799>]; + 52 -- 54 [side=R] + 54[label=<54
0.12293678964136756>]; + 54 -- 53 [side=L] + 53[label=<53
0.08266549169366844>]; + 55 -- 64 [side=R] + 64[label=<64
0.5480541758673416>]; + 64 -- 61 [side=L] + 61[label=<61
0.4333510895474586>]; + 61 -- 58 [side=L] + 58[label=<58
0.22965679673111572>]; + 58 -- 57 [side=L] + 57[label=<57
0.1750392554452569>]; + 57 -- 56 [side=L] + 56[label=<56
0.1154682181055644>]; + 58 -- 60 [side=R] + 60[label=<60
0.19505416228675265>]; + 60 -- 59 [side=L] + 59[label=<59
0.15628574332026035>]; + 61 -- 63 [side=R] + 63[label=<63
0.23625159275800722>]; + 63 -- 62 [side=L] + 62[label=<62
0.10778840213677576>]; + 64 -- 66 [side=R] + 66[label=<66
0.4795221664420316>]; + 66 -- 65 [side=L] + 65[label=<65
0.29689544767487486>]; + 66 -- 67 [side=R] + 67[label=<67
0.1446107813011439>]; + 68 -- 75 [side=R] + 75[label=<75
0.9136368186744673>]; + 75 -- 73 [side=L] + 73[label=<73
0.8180762618516633>]; + 73 -- 72 [side=L] + 72[label=<72
0.790052528538962>]; + 72 -- 69 [side=L] + 69[label=<69
0.6662534331501744>]; + 69 -- 70 [side=R] + 70[label=<70
0.198170103856931>]; + 70 -- 71 [side=R] + 71[label=<71
0.09798170568469355>]; + 73 -- 74 [side=R] + 74[label=<74
0.3113326781632004>]; + 75 -- 76 [side=R] + 76[label=<76
0.878956270284837>]; + 77 -- 84 [side=R] + 84[label=<84
0.7298827990732385>]; + 84 -- 81 [side=L] + 81[label=<81
0.7294243899593819>]; + 81 -- 78 [side=L] + 78[label=<78
0.6751780813023192>]; + 78 -- 80 [side=R] + 80[label=<80
0.2765792284019555>]; + 80 -- 79 [side=L] + 79[label=<79
0.12189576635143229>]; + 81 -- 83 [side=R] + 83[label=<83
0.5986915452921237>]; + 83 -- 82 [side=L] + 82[label=<82
0.5846092825823913>]; + 84 -- 85 [side=R] + 85[label=<85
0.5030186847252209>]; + 86 -- 103 [side=R] + 103[label=<103
0.968253245483458>]; + 103 -- 88 [side=L] + 88[label=<88
0.9609670084738324>]; + 88 -- 87 [side=L] + 87[label=<87
0.5446892558733331>]; + 88 -- 97 [side=R] + 97[label=<97
0.9483161815322799>]; + 97 -- 92 [side=L] + 92[label=<92
0.9417148997777522>]; + 92 -- 89 [side=L] + 89[label=<89
0.2455511319571373>]; + 89 -- 90 [side=R] + 90[label=<90
0.15647633602612276>]; + 90 -- 91 [side=R] + 91[label=<91
0.13997459061178652>]; + 92 -- 93 [side=R] + 93[label=<93
0.7804508194274744>]; + 93 -- 95 [side=R] + 95[label=<95
0.7226672256819942>]; + 95 -- 94 [side=L] + 94[label=<94
0.43503881905350683>]; + 95 -- 96 [side=R] + 96[label=<96
0.40712673636645136>]; + 97 -- 101 [side=R] + 101[label=<101
0.9255359833317219>]; + 101 -- 99 [side=L] + 99[label=<99
0.7139348283525223>]; + 99 -- 98 [side=L] + 98[label=<98
0.21279150783985867>]; + 99 -- 100 [side=R] + 100[label=<100
0.48002453571456083>]; + 101 -- 102 [side=R] + 102[label=<102
0.6219674393805691>]; + 103 -- 106 [side=R] + 106[label=<106
0.8731379458552129>]; + 106 -- 104 [side=L] + 104[label=<104
0.5400277589431796>]; + 104 -- 105 [side=R] + 105[label=<105
0.0007891566377928871>]; + 106 -- 110 [side=R] + 110[label=<110
0.8382929650437165>]; + 110 -- 109 [side=L] + 109[label=<109
0.4615149406437542>]; + 109 -- 107 [side=L] + 107[label=<107
0.43337904959817486>]; + 107 -- 108 [side=R] + 108[label=<108
0.053658620013432023>]; + 110 -- 112 [side=R] + 112[label=<112
0.5679810533328041>]; + 112 -- 111 [side=L] + 111[label=<111
0.541018763444613>]; + 112 -- 113 [side=R] + 113[label=<113
0.42841646768139463>]; + 114 -- 123 [side=R] + 123[label=<123
0.9464643826259924>]; + 123 -- 119 [side=L] + 119[label=<119
0.8662877311047388>]; + 119 -- 116 [side=L] + 116[label=<116
0.7670170824076343>]; + 116 -- 115 [side=L] + 115[label=<115
0.17190031141596263>]; + 116 -- 118 [side=R] + 118[label=<118
0.3671674221014496>]; + 118 -- 117 [side=L] + 117[label=<117
0.2159590427591035>]; + 119 -- 121 [side=R] + 121[label=<121
0.74738638353363>]; + 121 -- 120 [side=L] + 120[label=<120
0.08665916013292596>]; + 121 -- 122 [side=R] + 122[label=<122
0.4119543486009871>]; + 123 -- 143 [side=R] + 143[label=<143
0.9247442715890171>]; + 143 -- 140 [side=L] + 140[label=<140
0.8936337545064225>]; + 140 -- 125 [side=L] + 125[label=<125
0.8517904638602968>]; + 125 -- 124 [side=L] + 124[label=<124
0.681815353261336>]; + 125 -- 126 [side=R] + 126[label=<126
0.7946927108945189>]; + 126 -- 133 [side=R] + 133[label=<133
0.7289879153749418>]; + 133 -- 128 [side=L] + 128[label=<128
0.7256302169867033>]; + 128 -- 127 [side=L] + 127[label=<127
0.6200343851508556>]; + 128 -- 131 [side=R] + 131[label=<131
0.7089639682237622>]; + 131 -- 129 [side=L] + 129[label=<129
0.5255088944172916>]; + 129 -- 130 [side=R] + 130[label=<130
0.41550021924797953>]; + 131 -- 132 [side=R] + 132[label=<132
0.1851371011016204>]; + 133 -- 134 [side=R] + 134[label=<134
0.6333388625482715>]; + 134 -- 135 [side=R] + 135[label=<135
0.4707182839428111>]; + 135 -- 138 [side=R] + 138[label=<138
0.4128350639522185>]; + 138 -- 137 [side=L] + 137[label=<137
0.25475301393491523>]; + 137 -- 136 [side=L] + 136[label=<136
0.04841095612377411>]; + 138 -- 139 [side=R] + 139[label=<139
0.3855980374729622>]; + 140 -- 141 [side=R] + 141[label=<141
0.8655582249610275>]; + 141 -- 142 [side=R] + 142[label=<142
0.2288591915235536>]; + 143 -- 144 [side=R] + 144[label=<144
0.36710686055015185>]; + 145 -- 235 [side=R] + 235[label=<235
0.9812914040931304>]; + 235 -- 169 [side=L] + 169[label=<169
0.9782585107854103>]; + 169 -- 148 [side=L] + 148[label=<148
0.9449023406787344>]; + 148 -- 147 [side=L] + 147[label=<147
0.7293740098286456>]; + 147 -- 146 [side=L] + 146[label=<146
0.509027051210447>]; + 148 -- 150 [side=R] + 150[label=<150
0.9020436951014624>]; + 150 -- 149 [side=L] + 149[label=<149
0.8190200516813236>]; + 150 -- 153 [side=R] + 153[label=<153
0.8855777631108436>]; + 153 -- 152 [side=L] + 152[label=<152
0.8603376027563862>]; + 152 -- 151 [side=L] + 151[label=<151
0.45459695989529647>]; + 153 -- 167 [side=R] + 167[label=<167
0.8492899523613442>]; + 167 -- 163 [side=L] + 163[label=<163
0.8177575678069656>]; + 163 -- 160 [side=L] + 160[label=<160
0.7759852430212377>]; + 160 -- 158 [side=L] + 158[label=<158
0.734924251943539>]; + 158 -- 156 [side=L] + 156[label=<156
0.710110660084087>]; + 156 -- 155 [side=L] + 155[label=<155
0.2959097323390557>]; + 155 -- 154 [side=L] + 154[label=<154
0.23060113716089548>]; + 156 -- 157 [side=R] + 157[label=<157
0.04398748527099361>]; + 158 -- 159 [side=R] + 159[label=<159
0.09047000528918914>]; + 160 -- 162 [side=R] + 162[label=<162
0.7216878041963218>]; + 162 -- 161 [side=L] + 161[label=<161
0.6871202472393689>]; + 163 -- 165 [side=R] + 165[label=<165
0.3886140063494328>]; + 165 -- 164 [side=L] + 164[label=<164
0.3870484684485387>]; + 165 -- 166 [side=R] + 166[label=<166
0.3476475192762013>]; + 167 -- 168 [side=R] + 168[label=<168
0.8169397159175148>]; + 169 -- 172 [side=R] + 172[label=<172
0.9747928133504129>]; + 172 -- 170 [side=L] + 170[label=<170
0.846452976104617>]; + 170 -- 171 [side=R] + 171[label=<171
0.3121690933042578>]; + 172 -- 230 [side=R] + 230[label=<230
0.958355558453681>]; + 230 -- 187 [side=L] + 187[label=<187
0.957557676245447>]; + 187 -- 180 [side=L] + 180[label=<180
0.92710438449752>]; + 180 -- 174 [side=L] + 174[label=<174
0.7049973940003331>]; + 174 -- 173 [side=L] + 173[label=<173
0.152023809588824>]; + 174 -- 179 [side=R] + 179[label=<179
0.6715227532708304>]; + 179 -- 176 [side=L] + 176[label=<176
0.6621862385446002>]; + 176 -- 175 [side=L] + 175[label=<175
0.1248086920689816>]; + 176 -- 178 [side=R] + 178[label=<178
0.5713418767827186>]; + 178 -- 177 [side=L] + 177[label=<177
0.5560171098718876>]; + 180 -- 183 [side=R] + 183[label=<183
0.9250644230760526>]; + 183 -- 182 [side=L] + 182[label=<182
0.6824354777265715>]; + 182 -- 181 [side=L] + 181[label=<181
0.012508171868127804>]; + 183 -- 184 [side=R] + 184[label=<184
0.4823271793225655>]; + 184 -- 185 [side=R] + 185[label=<185
0.3841022310929997>]; + 185 -- 186 [side=R] + 186[label=<186
0.18775042542247533>]; + 187 -- 203 [side=R] + 203[label=<203
0.8944186382789656>]; + 203 -- 194 [side=L] + 194[label=<194
0.878541152511799>]; + 194 -- 190 [side=L] + 190[label=<190
0.6762694909925744>]; + 190 -- 188 [side=L] + 188[label=<188
0.3829541869540283>]; + 188 -- 189 [side=R] + 189[label=<189
0.20943675668859407>]; + 190 -- 192 [side=R] + 192[label=<192
0.5817469625147841>]; + 192 -- 191 [side=L] + 191[label=<191
0.09503744303386963>]; + 192 -- 193 [side=R] + 193[label=<193
0.3644771285875493>]; + 194 -- 197 [side=R] + 197[label=<197
0.7948407869124952>]; + 197 -- 195 [side=L] + 195[label=<195
0.7942811285510138>]; + 195 -- 196 [side=R] + 196[label=<196
0.5627582929517205>]; + 197 -- 202 [side=R] + 202[label=<202
0.6971384514391066>]; + 202 -- 198 [side=L] + 198[label=<198
0.6927264474372884>]; + 198 -- 200 [side=R] + 200[label=<200
0.6455091049085492>]; + 200 -- 199 [side=L] + 199[label=<199
0.5952913724552451>]; + 200 -- 201 [side=R] + 201[label=<201
0.02134082793304981>]; + 203 -- 222 [side=R] + 222[label=<222
0.7905661844403327>]; + 222 -- 221 [side=L] + 221[label=<221
0.7344461814842598>]; + 221 -- 218 [side=L] + 218[label=<218
0.710158738001777>]; + 218 -- 214 [side=L] + 214[label=<214
0.6681641416928006>]; + 214 -- 210 [side=L] + 210[label=<210
0.6445674840025054>]; + 210 -- 204 [side=L] + 204[label=<204
0.4613335241960753>]; + 204 -- 207 [side=R] + 207[label=<207
0.4534069998012402>]; + 207 -- 205 [side=L] + 205[label=<205
0.3958801765835097>]; + 205 -- 206 [side=R] + 206[label=<206
0.11317604266906478>]; + 207 -- 208 [side=R] + 208[label=<208
0.15215468537761012>]; + 208 -- 209 [side=R] + 209[label=<209
0.1517155863611125>]; + 210 -- 213 [side=R] + 213[label=<213
0.5254528847170963>]; + 213 -- 212 [side=L] + 212[label=<212
0.3831362875816451>]; + 212 -- 211 [side=L] + 211[label=<211
0.05056020282973139>]; + 214 -- 215 [side=R] + 215[label=<215
0.5009367015686823>]; + 215 -- 216 [side=R] + 216[label=<216
0.10901874828554337>]; + 216 -- 217 [side=R] + 217[label=<217
0.05646524838685241>]; + 218 -- 220 [side=R] + 220[label=<220
0.674411240175005>]; + 220 -- 219 [side=L] + 219[label=<219
0.14299197003272757>]; + 222 -- 223 [side=R] + 223[label=<223
0.6736901076540927>]; + 223 -- 226 [side=R] + 226[label=<226
0.6137254504515176>]; + 226 -- 225 [side=L] + 225[label=<225
0.42057192814517086>]; + 225 -- 224 [side=L] + 224[label=<224
0.15459807255670532>]; + 226 -- 227 [side=R] + 227[label=<227
0.3240860179195383>]; + 227 -- 229 [side=R] + 229[label=<229
0.1917962317572811>]; + 229 -- 228 [side=L] + 228[label=<228
0.09544487292662296>]; + 230 -- 231 [side=R] + 231[label=<231
0.9537855788342208>]; + 231 -- 234 [side=R] + 234[label=<234
0.8201857812416328>]; + 234 -- 233 [side=L] + 233[label=<233
0.38950247492509926>]; + 233 -- 232 [side=L] + 232[label=<232
0.3117950672088875>]; + 235 -- 245 [side=R] + 245[label=<245
0.9606326065278836>]; + 245 -- 243 [side=L] + 243[label=<243
0.9010055124673463>]; + 243 -- 237 [side=L] + 237[label=<237
0.8906258722253291>]; + 237 -- 236 [side=L] + 236[label=<236
0.699210568923407>]; + 237 -- 239 [side=R] + 239[label=<239
0.8460173468831691>]; + 239 -- 238 [side=L] + 238[label=<238
0.11026801706799916>]; + 239 -- 242 [side=R] + 242[label=<242
0.8439586286718704>]; + 242 -- 240 [side=L] + 240[label=<240
0.6602788030770246>]; + 240 -- 241 [side=R] + 241[label=<241
0.5119345141518413>]; + 243 -- 244 [side=R] + 244[label=<244
0.7247536554274525>]; + 245 -- 246 [side=R] + 246[label=<246
0.5721754588545217>]; + 246 -- 247 [side=R] + 247[label=<247
0.5181752052426042>]; + 247 -- 249 [side=R] + 249[label=<249
0.07619775425067954>]; + 249 -- 248 [side=L] + 248[label=<248
0.03123446276525388>]; +} From c97d83d88382b7f845a8d00b9892d285b1db1152 Mon Sep 17 00:00:00 2001 From: Bruno BELANYI Date: Sun, 14 Jul 2024 17:56:33 +0100 Subject: [PATCH 066/107] posts: bloom-filter: add lookup --- content/posts/2024-07-14-bloom-filter/index.md | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/content/posts/2024-07-14-bloom-filter/index.md b/content/posts/2024-07-14-bloom-filter/index.md index 7c02140..86aca41 100644 --- a/content/posts/2024-07-14-bloom-filter/index.md +++ b/content/posts/2024-07-14-bloom-filter/index.md @@ -77,3 +77,21 @@ def insert(self, val: T) -> None: # Set the corresponding bit self._bit |= 1 << n ``` + +### Querying a key + +Because the _Bloom Filter_ does not actually store its elements, but some +derived data from hashing them, it can only definitely say if an element _does +not_ belong to it. Otherwise, it _may_ be part of the set, and should be checked +against the actual underlying store. + +```python +def may_contain(self, val: T) -> bool: + for f in self._hash_functions: + n = f(val) % BIT_COUNT + # If one of the bits is unset, the value is definitely not present + if not (self._bit & (1 << n)): + return False + # All bits were matched, `val` is likely to be part of the set + return True +``` From 9ff51fe82eeed18a9c130ff2b9a9264d8898a33b Mon Sep 17 00:00:00 2001 From: Bruno BELANYI Date: Sat, 20 Jul 2024 19:21:47 +0100 Subject: [PATCH 067/107] posts: treap: add presentation --- content/posts/2024-07-20-treap/index.md | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/content/posts/2024-07-20-treap/index.md b/content/posts/2024-07-20-treap/index.md index edcda59..8d5973a 100644 --- a/content/posts/2024-07-20-treap/index.md +++ b/content/posts/2024-07-20-treap/index.md @@ -27,3 +27,13 @@ parent's priority is always higher than any of its children. [wiki]: https://en.wikipedia.org/wiki/Treap + +## What does it do? + +By randomizing the priority value of each key at insertion time, we ensure a +high likelihood that the tree stays _roughly_ balanced, avoiding degenerating to +unbalanced O(N) height. + +Here's a sample tree created by inserting integers from 0 to 250 into the tree: + +{{< graphviz file="treap.gv" />}} From 21fbc24e0240471301460c066a0ffb7dbe976ed7 Mon Sep 17 00:00:00 2001 From: Bruno BELANYI Date: Sat, 27 Jul 2024 18:31:24 +0100 Subject: [PATCH 068/107] posts: add 'treap-revisited' --- .../posts/2024-07-27-treap-revisited/index.md | 29 +++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 content/posts/2024-07-27-treap-revisited/index.md diff --git a/content/posts/2024-07-27-treap-revisited/index.md b/content/posts/2024-07-27-treap-revisited/index.md new file mode 100644 index 0000000..c9c01bc --- /dev/null +++ b/content/posts/2024-07-27-treap-revisited/index.md @@ -0,0 +1,29 @@ +--- +title: "Treap, revisited" +date: 2024-07-27T14:12:27+01:00 +draft: false # I don't care for draft mode, git has branches for that +description: "An even simpler BST" +tags: + - algorithms + - data structures + - python +categories: + - programming +series: + - Cool algorithms +favorite: false +disable_feed: false +--- + +My [last post]({{< relref "../2024-07-20-treap/index.md" >}}) about the _Treap_ +showed an implementation using tree rotations, as is commonly done with [AVL +Trees][avl] and [Red Black Trees][rb]. + +But the _Treap_ lends itself well to a simple and elegant implementation with no +tree rotations. This makes it especially easy to implement the removal of a key, +rather than the fiddly process of deletion using tree rotations. + +[avl]: https://en.wikipedia.org/wiki/AVL_tree +[rb]: https://en.wikipedia.org/wiki/Red%E2%80%93black_tree + + From e842737cb6c4819b6b2585b87104afe164fb901d Mon Sep 17 00:00:00 2001 From: Bruno BELANYI Date: Sat, 20 Jul 2024 19:22:38 +0100 Subject: [PATCH 069/107] posts: treap: add construction --- content/posts/2024-07-20-treap/index.md | 42 +++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/content/posts/2024-07-20-treap/index.md b/content/posts/2024-07-20-treap/index.md index 8d5973a..a5926f3 100644 --- a/content/posts/2024-07-20-treap/index.md +++ b/content/posts/2024-07-20-treap/index.md @@ -37,3 +37,45 @@ unbalanced O(N) height. Here's a sample tree created by inserting integers from 0 to 250 into the tree: {{< graphviz file="treap.gv" />}} + +## Implementation + +I'll be keeping the theme for this [series] by using Python to implement the +_Treap_. This leads to somewhat annoying code to handle the rotation process, +which is easier to do in C using pointers. + +[series]: {{< ref "/series/cool-algorithms/" >}} + +### Representation + +Creating a new `Treap` is easy: the tree starts off empty, waiting for new nodes +to insert. + +Each `Node` must keep track of the `key`, the mapped `value`, and the node's +`priority` (which is assigned randomly). Finally it must also allow for storing +two children (`left` and `right`). + +```python +class Node[K, V]: + key: K + value: V + priority: float + left: Node[K, V] | None + righg: Node[K, V] | None + + def __init__(self, key: K, value: V): + # Store key and value, like a normal BST node + self.key = key + self.value = value + # Priority is derived randomly + self.priority = random() + self.left = None + self.right = None + +class Treap[K, V]: + _root: Node[K, V] | None + + def __init__(self): + # The tree starts out empty + self._root = None +``` From a6bbb100981da2d742d428d85f3c8dfe96fc6231 Mon Sep 17 00:00:00 2001 From: Bruno BELANYI Date: Sat, 20 Jul 2024 16:20:33 +0100 Subject: [PATCH 070/107] layouts: tikz: allow using file input Makes it easier to handle big diagrams. --- layouts/shortcodes/tikz.html | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/layouts/shortcodes/tikz.html b/layouts/shortcodes/tikz.html index a3a78f4..c298b09 100644 --- a/layouts/shortcodes/tikz.html +++ b/layouts/shortcodes/tikz.html @@ -1,3 +1,16 @@ From 19b535ce49d70e1099a39e764d38ca0f79a0f5dd Mon Sep 17 00:00:00 2001 From: Bruno BELANYI Date: Sat, 27 Jul 2024 18:31:47 +0100 Subject: [PATCH 071/107] posts: treap-revisited: add implementation --- content/posts/2024-07-27-treap-revisited/index.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/content/posts/2024-07-27-treap-revisited/index.md b/content/posts/2024-07-27-treap-revisited/index.md index c9c01bc..f188568 100644 --- a/content/posts/2024-07-27-treap-revisited/index.md +++ b/content/posts/2024-07-27-treap-revisited/index.md @@ -27,3 +27,12 @@ rather than the fiddly process of deletion using tree rotations. [rb]: https://en.wikipedia.org/wiki/Red%E2%80%93black_tree + +## Implementation + +All operations on the tree will be implemented in terms of two fundamental +operations: `split` and `merge`. + +We'll be reusing the same structures as in the last post, so let's skip straight +to implementing those fundaments, and building on them for `insert` and +`delete`. From 2eaa9c43293d7830ecd30b9aa25935afa7553acf Mon Sep 17 00:00:00 2001 From: Bruno BELANYI Date: Sat, 20 Jul 2024 19:23:00 +0100 Subject: [PATCH 072/107] posts: treap: add search --- content/posts/2024-07-20-treap/index.md | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/content/posts/2024-07-20-treap/index.md b/content/posts/2024-07-20-treap/index.md index a5926f3..db29bc7 100644 --- a/content/posts/2024-07-20-treap/index.md +++ b/content/posts/2024-07-20-treap/index.md @@ -79,3 +79,21 @@ class Treap[K, V]: # The tree starts out empty self._root = None ``` + +### Search + +Searching the tree is the same as in any other _Binary Search Tree_. + +```python +def get(self, key: K) -> T | None: + node = self._root + # The usual BST traversal + while node is not None: + if node.key == key: + return node.value + elif node.key < key: + node = node.right + else: + node = node.left + return None +``` From 87ef9dd38c0f9fbec0b019097e38fb2b3bd9e9ec Mon Sep 17 00:00:00 2001 From: Bruno BELANYI Date: Sat, 20 Jul 2024 17:21:03 +0100 Subject: [PATCH 073/107] layouts: add Graphviz support Similar to TikZ support. --- content/posts/2020-07-14-hello-world/index.md | 11 +++++++++++ layouts/partials/head-extra.html | 17 +++++++++++++++++ layouts/shortcodes/graphviz.html | 16 ++++++++++++++++ 3 files changed, 44 insertions(+) create mode 100644 layouts/shortcodes/graphviz.html diff --git a/content/posts/2020-07-14-hello-world/index.md b/content/posts/2020-07-14-hello-world/index.md index 805eb07..9eb06e8 100644 --- a/content/posts/2020-07-14-hello-world/index.md +++ b/content/posts/2020-07-14-hello-world/index.md @@ -8,6 +8,7 @@ tags: categories: favorite: false tikz: true +graphviz: true --- ## Test post please ignore @@ -40,6 +41,16 @@ echo hello world | cut -d' ' -f 1 \end{tikzpicture} {{% /tikz %}} +### Graphviz support + +{{% graphviz %}} + graph { + a -- b + b -- c + c -- a + } +{{% /graphviz %}} + ### Spoilers {{% spoiler "Don't open me" %}} diff --git a/layouts/partials/head-extra.html b/layouts/partials/head-extra.html index 1e65c9f..f0a049d 100644 --- a/layouts/partials/head-extra.html +++ b/layouts/partials/head-extra.html @@ -3,6 +3,23 @@ {{ end }} + +{{ if (.Params.graphviz) }} + + +{{ end }} {{ with .OutputFormats.Get "atom" -}} {{ printf `` .Rel .MediaType.Type .Permalink $.Site.Title | safeHTML }} {{ end -}} diff --git a/layouts/shortcodes/graphviz.html b/layouts/shortcodes/graphviz.html new file mode 100644 index 0000000..45516a3 --- /dev/null +++ b/layouts/shortcodes/graphviz.html @@ -0,0 +1,16 @@ +

+    {{ with .Get "file" }}
+        {{ if eq (. | printf "%.1s") "/" }}
+            {{/* Absolute path are from root of site. */}}
+            {{ $.Scratch.Set "filepath" . }}
+        {{ else }}
+            {{/* Relative paths are from page directory. */}}
+            {{ $.Scratch.Set "filepath" $.Page.File.Dir }}
+            {{ $.Scratch.Add "filepath" . }}
+        {{ end }}
+
+        {{ $.Scratch.Get "filepath" | readFile }}
+    {{ else }}
+        {{.Inner}}
+    {{ end }}
+
From 62cd0759cf317cc27236313feddeb3961abf2ed7 Mon Sep 17 00:00:00 2001 From: Bruno BELANYI Date: Fri, 2 Aug 2024 21:03:47 +0100 Subject: [PATCH 074/107] config: enable MathJax --- config.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/config.yaml b/config.yaml index c0927fc..b0aadf8 100644 --- a/config.yaml +++ b/config.yaml @@ -67,6 +67,7 @@ params: webmentions: login: belanyi.fr pingback: true + mathjax: true taxonomies: category: "categories" From d33247b786631a0c63bf420d19170342a53737e6 Mon Sep 17 00:00:00 2001 From: Bruno BELANYI Date: Sat, 27 Jul 2024 18:32:09 +0100 Subject: [PATCH 075/107] posts: treap-revisited: add split --- .../posts/2024-07-27-treap-revisited/index.md | 37 +++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/content/posts/2024-07-27-treap-revisited/index.md b/content/posts/2024-07-27-treap-revisited/index.md index f188568..097dfe5 100644 --- a/content/posts/2024-07-27-treap-revisited/index.md +++ b/content/posts/2024-07-27-treap-revisited/index.md @@ -36,3 +36,40 @@ operations: `split` and `merge`. We'll be reusing the same structures as in the last post, so let's skip straight to implementing those fundaments, and building on them for `insert` and `delete`. + +### Split + +Splitting a tree means taking a key, and getting the following output: + +* a `left` node, root of the tree of all keys lower than the input. +* an extracted `node` which corresponds to the input `key`. +* a `right` node, root of the tree of all keys higher than the input. + +```python +type OptionalNode[K, V] = Node[K, V] | None + +class SplitResult(NamedTuple): + left: OptionalNode + node: OptionalNode + right: OptionalNode + +def split(root: OptionalNode[K, V], key: K) -> SplitResult: + # Base case, empty tree + if root is None: + return SplitResult(None, None, None) + # If we found the key, simply extract left and right + if root.key == key: + left, right = root.left, root.right + root.left, root.right = None, None + return SplitResult(left, root, right) + # Otherwise, recurse on the corresponding side of the tree + if root.key < key: + left, node, right = split(root.right, key) + root.right = left + return SplitResult(root, node, right) + if key < root.key: + left, node, right = split(root.left, key) + root.left = right + return SplitResult(left, node, root) + raise RuntimeError("Unreachable") +``` From dea81f18598ca7f0f3f74250430b8bec68cf2238 Mon Sep 17 00:00:00 2001 From: Bruno BELANYI Date: Sat, 20 Jul 2024 19:23:13 +0100 Subject: [PATCH 076/107] posts: treap: add insertion --- content/posts/2024-07-20-treap/index.md | 60 +++++++++++++++++++++++++ 1 file changed, 60 insertions(+) diff --git a/content/posts/2024-07-20-treap/index.md b/content/posts/2024-07-20-treap/index.md index db29bc7..431e68b 100644 --- a/content/posts/2024-07-20-treap/index.md +++ b/content/posts/2024-07-20-treap/index.md @@ -97,3 +97,63 @@ def get(self, key: K) -> T | None: node = node.left return None ``` + +### Insertion + +To insert a new `key` into the tree, we identify which leaf position it should +be inserted at. We then generate the node's priority, insert it at this +position, and rotate the node upwards until the heap property is respected. + +```python +type ChildField = Literal["left, right"] + +def insert(self, key: K, value: V) -> bool: + # Empty treap base-case + if self._root is None: + self._root = Node(key, value) + # Signal that we're not overwriting the value + return False + # Keep track of the parent chain for rotation after insertion + parents = [] + node = self._root + while node is not None: + # Insert a pre-existing key + if node.key == key: + node.value = value + return True + # Go down the tree, keep track of the path through the tree + field = "left" if key < node.key else "right" + parents.append((node, field)) + node = getattr(node, field) + # Key wasn't found, we're inserting a new node + child = Node(key, value) + parent, field = parents[-1] + setattr(parent, field, child) + # Rotate the new node up until we respect the decreasing priority property + self._rotate_up(child, parents) + # Key wasn't found, signal that we inserted a new node + return False + +def _rotate_up( + self, + node: Node[K, V], + parents: list[tuple[Node[K, V], ChildField]], +) -> None: + while parents: + parent, field = parents.pop() + # If the parent has higher priority, we're done rotating + if parent.priority >= node.priority: + break + # Check for grand-parent/root of tree edge-case + if parents: + # Update grand-parent to point to the new rotated node + grand_parent, field = parents[-1] + setattr(grand_parent, field, node) + else: + # Point the root to the new rotated node + self._root = node + other_field = "left" if field == "right" else "right" + # Rotate the node up + setattr(parent, field, getattr(node, other_field)) + setattr(node, other_field, parent) +``` From 5a233e738466464493b4ba516a4e05d39edd0c2f Mon Sep 17 00:00:00 2001 From: Bruno BELANYI Date: Sat, 20 Jul 2024 17:24:21 +0100 Subject: [PATCH 077/107] layouts: add Mermaid support Similar to Graphviz and TikZ support. --- content/posts/2020-07-14-hello-world/index.md | 14 ++++++++++++++ layouts/partials/head-extra.html | 7 +++++++ layouts/shortcodes/mermaid.html | 16 ++++++++++++++++ 3 files changed, 37 insertions(+) create mode 100644 layouts/shortcodes/mermaid.html diff --git a/content/posts/2020-07-14-hello-world/index.md b/content/posts/2020-07-14-hello-world/index.md index 9eb06e8..d430add 100644 --- a/content/posts/2020-07-14-hello-world/index.md +++ b/content/posts/2020-07-14-hello-world/index.md @@ -9,6 +9,7 @@ categories: favorite: false tikz: true graphviz: true +mermaid: true --- ## Test post please ignore @@ -51,6 +52,19 @@ echo hello world | cut -d' ' -f 1 } {{% /graphviz %}} +### Mermaid support + +{{% mermaid %}} + graph TD + A[Enter Chart Definition] --> B(Preview) + B --> C{decide} + C --> D[Keep] + C --> E[Edit Definition] + E --> B + D --> F[Save Image and Code] + F --> B +{{% /graphviz %}} + ### Spoilers {{% spoiler "Don't open me" %}} diff --git a/layouts/partials/head-extra.html b/layouts/partials/head-extra.html index f0a049d..dc97efa 100644 --- a/layouts/partials/head-extra.html +++ b/layouts/partials/head-extra.html @@ -20,6 +20,13 @@ })(); {{ end }} + +{{ if (.Params.mermaid) }} + +{{ end }} {{ with .OutputFormats.Get "atom" -}} {{ printf `` .Rel .MediaType.Type .Permalink $.Site.Title | safeHTML }} {{ end -}} diff --git a/layouts/shortcodes/mermaid.html b/layouts/shortcodes/mermaid.html new file mode 100644 index 0000000..80cf0a5 --- /dev/null +++ b/layouts/shortcodes/mermaid.html @@ -0,0 +1,16 @@ +
+    {{ with .Get "file" }}
+        {{ if eq (. | printf "%.1s") "/" }}
+            {{/* Absolute path are from root of site. */}}
+            {{ $.Scratch.Set "filepath" . }}
+        {{ else }}
+            {{/* Relative paths are from page directory. */}}
+            {{ $.Scratch.Set "filepath" $.Page.File.Dir }}
+            {{ $.Scratch.Add "filepath" . }}
+        {{ end }}
+
+        {{ $.Scratch.Get "filepath" | readFile }}
+    {{ else }}
+        {{.Inner}}
+    {{ end }}
+
From cd24e9692a06c5b590dfa027f042fa6eeffb1e3c Mon Sep 17 00:00:00 2001 From: Bruno BELANYI Date: Fri, 2 Aug 2024 21:10:39 +0100 Subject: [PATCH 078/107] markdownlint: relax duplicate header check --- .markdownlint.yaml | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 .markdownlint.yaml diff --git a/.markdownlint.yaml b/.markdownlint.yaml new file mode 100644 index 0000000..419c334 --- /dev/null +++ b/.markdownlint.yaml @@ -0,0 +1,3 @@ +# MD024/no-duplicate-heading/no-duplicate-header +MD024: + siblings_only: true From 0798812f86a17f397e12c76dc5ac131ade2d35a0 Mon Sep 17 00:00:00 2001 From: Bruno BELANYI Date: Sat, 27 Jul 2024 18:32:28 +0100 Subject: [PATCH 079/107] posts: treap: add merge --- .../posts/2024-07-27-treap-revisited/index.md | 30 +++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/content/posts/2024-07-27-treap-revisited/index.md b/content/posts/2024-07-27-treap-revisited/index.md index 097dfe5..902ab57 100644 --- a/content/posts/2024-07-27-treap-revisited/index.md +++ b/content/posts/2024-07-27-treap-revisited/index.md @@ -73,3 +73,33 @@ def split(root: OptionalNode[K, V], key: K) -> SplitResult: return SplitResult(left, node, root) raise RuntimeError("Unreachable") ``` + +### Merge + +Merging a `left` and `right` tree means (cheaply) building a new tree containing +both of them. A pre-condition for merging is that the `left` tree is composed +entirely of nodes that are lower than any key in `right` (i.e: as in `left` and +`right` after a `split`). + +```python +def merge( + left: OptionalNode[K, V], + right: OptionalNode[K, V], +) -> OptionalNode[K, V]: + # Base cases, left or right being empty + if left is None: + return right + if right is None: + return left + # Left has higher priority, it must become the root node + if left.priority >= right.priority: + # We recursively reconstruct its right sub-tree + left.right = merge(left.right, right) + return left + # Right has higher priority, it must become the root node + if left.priority < right.priority: + # We recursively reconstruct its left sub-tree + right.left = merge(left, right.left) + return right + raise RuntimeError("Unreachable") +``` From 3605445bcf7f1ea5dc3e8c810bd5a387aced2078 Mon Sep 17 00:00:00 2001 From: Bruno BELANYI Date: Fri, 2 Aug 2024 21:04:30 +0100 Subject: [PATCH 080/107] posts: add 'reservoir-sampling' --- .../2024-08-02-reservoir-sampling/index.md | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 content/posts/2024-08-02-reservoir-sampling/index.md diff --git a/content/posts/2024-08-02-reservoir-sampling/index.md b/content/posts/2024-08-02-reservoir-sampling/index.md new file mode 100644 index 0000000..386a840 --- /dev/null +++ b/content/posts/2024-08-02-reservoir-sampling/index.md @@ -0,0 +1,27 @@ +--- +title: "Reservoir Sampling" +date: 2024-08-02T18:30:56+01:00 +draft: false # I don't care for draft mode, git has branches for that +description: "Elegantly sampling a stream" +tags: + - algorithms + - python +categories: + - programming +series: + - Cool algorithms +favorite: false +disable_feed: false +mathjax: true +--- + +[_Reservoir Sampling_][reservoir] is an [online][online], probabilistic +algorithm to uniformly sample $k$ random elements out of a stream of values. + +It's a particularly elegant and small algorithm, only requiring $\Theta(k)$ +amount of space and a single pass through the stream. + +[reservoir]: https://en.wikipedia.org/wiki/Reservoir_sampling +[online]: https://en.wikipedia.org/wiki/Online_algorithm + + From 652fe81c418adbd6330769ef1459a012f092b8b5 Mon Sep 17 00:00:00 2001 From: Bruno BELANYI Date: Sat, 27 Jul 2024 18:32:47 +0100 Subject: [PATCH 081/107] posts: treap-revisited: add insertion --- .../posts/2024-07-27-treap-revisited/index.md | 25 +++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/content/posts/2024-07-27-treap-revisited/index.md b/content/posts/2024-07-27-treap-revisited/index.md index 902ab57..068811f 100644 --- a/content/posts/2024-07-27-treap-revisited/index.md +++ b/content/posts/2024-07-27-treap-revisited/index.md @@ -103,3 +103,28 @@ def merge( return right raise RuntimeError("Unreachable") ``` + +### Insertion + +Inserting a node into the tree is done in two steps: + +1. `split` the tree to isolate the middle insertion point +2. `merge` it back up to form a full tree with the inserted key + +```python +def insert(self, key: K, value: V) -> bool: + # `left` and `right` come before/after the key + left, node, right = split(self._root, key) + was_updated: bool + # Create the node, or update its value, if the key was already in the tree + if node is None: + node = Node(key, value) + was_updated = False + else: + node.value = value + was_updated = True + # Rebuild the tree with a couple of merge operations + self._root = merge(left, merge(node, right)) + # Signal whether the key was already in the key + return was_updated +``` From eff815230789255837e06ae48c8baf21806b55b1 Mon Sep 17 00:00:00 2001 From: Bruno BELANYI Date: Sat, 20 Jul 2024 19:26:08 +0100 Subject: [PATCH 082/107] posts: union-find: fix typo --- content/posts/2024-06-24-union-find/index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/content/posts/2024-06-24-union-find/index.md b/content/posts/2024-06-24-union-find/index.md index 7c9435c..dfe4c68 100644 --- a/content/posts/2024-06-24-union-find/index.md +++ b/content/posts/2024-06-24-union-find/index.md @@ -15,7 +15,7 @@ favorite: false disable_feed: false --- -To kickoff the [series]({{< ref "/series/cool-algorithms/">}}) of posts about +To kickoff the [series]({{< ref "/series/cool-algorithms/" >}}) of posts about algorithms and data structures I find interesting, I will be talking about my favorite one: the [_Disjoint Set_][wiki]. Also known as the _Union-Find_ data structure, so named because of its two main operations: `ds.union(lhs, rhs)` and From 9ff4a07c9b3b644e4485f827cd9b54b672a08c10 Mon Sep 17 00:00:00 2001 From: Bruno BELANYI Date: Fri, 2 Aug 2024 21:04:49 +0100 Subject: [PATCH 083/107] posts: reservoir-sampling: add one-element sample --- .../2024-08-02-reservoir-sampling/index.md | 56 +++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/content/posts/2024-08-02-reservoir-sampling/index.md b/content/posts/2024-08-02-reservoir-sampling/index.md index 386a840..eabeab6 100644 --- a/content/posts/2024-08-02-reservoir-sampling/index.md +++ b/content/posts/2024-08-02-reservoir-sampling/index.md @@ -25,3 +25,59 @@ amount of space and a single pass through the stream. [online]: https://en.wikipedia.org/wiki/Online_algorithm + +## Sampling one element + +As an introduction, we'll first focus on fairly sampling one element from the +stream. + +```python +def sample_one[T](stream: Iterable[T]) -> T: + stream_iter = iter(stream) + # Sample the first element + res = next(stream_iter) + for i, val in enumerate(stream_iter, start=1): + j = random.randint(0, i) + # Replace the sampled element with probability 1/(i + 1) + if j == 0: + res = val + # Return the randomly sampled element + return res +``` + +### Proof + +Let's now prove that this algorithm leads to a fair sampling of the stream. + +We'll be doing proof by induction. + +#### Hypothesis $H_N$ + +After iterating through the first $N$ items in the stream, +each of them has had an equal $\frac{1}{N}$ probability of being selected as +`res`. + +#### Base Case $H_1$ + +We can trivially observe that the first element is always assigned to `res`, +$\frac{1}{1} = 1$, the hypothesis has been verified. + +#### Inductive Case + +For a given $N$, let us assume that $H_N$ holds. Let us now look at the events +of loop iteration where `i = N` (i.e: observation of the $N + 1$-th item in the +stream). + +`j = random.randint(0, i)` uniformly selects a value in the range $[0, i]$, +a.k.a $[0, N]$. We then have two cases: + +* `j == 0`, with probability $\frac{1}{N + 1}$: we select `val` as the new +reservoir element `res`. + +* `j != 0`, with probability $\frac{N}{N + 1}$: we keep the previous value of +`res`. By $H_N$, any of the first $N$ elements had a $\frac{1}{N}$ probability +of being `res` before at the start of the loop, each element now has a +probability $\frac{1}{N} \cdot \frac{N}{N + 1} = \frac{1}{N + 1}$ of being the +element. + +And thus, we have proven $H_{N + 1}$ at the end of the loop. From 883f0e7e9b08d11303f2a8a678a5929bd4c38be5 Mon Sep 17 00:00:00 2001 From: Bruno BELANYI Date: Sat, 27 Jul 2024 18:33:05 +0100 Subject: [PATCH 084/107] posts: treap: add removal --- .../posts/2024-07-27-treap-revisited/index.md | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/content/posts/2024-07-27-treap-revisited/index.md b/content/posts/2024-07-27-treap-revisited/index.md index 068811f..99ae68f 100644 --- a/content/posts/2024-07-27-treap-revisited/index.md +++ b/content/posts/2024-07-27-treap-revisited/index.md @@ -128,3 +128,19 @@ def insert(self, key: K, value: V) -> bool: # Signal whether the key was already in the key return was_updated ``` + +### Removal + +Removing a key from the tree is similar to inserting a new key, and forgetting +to insert it back: simply `split` the tree and `merge` it back without the +extracted middle node. + +```python +def remove(self, key: K) -> bool: + # `node` contains the key, or `None` if the key wasn't in the tree + left, node, right = split(self._root, key) + # Put the tree back together, without the extract node + self._root = merge(left, right) + # Signal whether `key` was mapped in the tree + return node is not None +``` From 806772d883d8dd2eb5e9e59583383be5fa9ba9b9 Mon Sep 17 00:00:00 2001 From: Bruno BELANYI Date: Sat, 20 Jul 2024 19:26:24 +0100 Subject: [PATCH 085/107] posts: gap-buffer: fix typo --- content/posts/2024-07-06-gap-buffer/index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/content/posts/2024-07-06-gap-buffer/index.md b/content/posts/2024-07-06-gap-buffer/index.md index a9aac96..b5b0b3a 100644 --- a/content/posts/2024-07-06-gap-buffer/index.md +++ b/content/posts/2024-07-06-gap-buffer/index.md @@ -37,7 +37,7 @@ shorter/longer as required. ## Implementation I'll be writing a sample implementation in Python, as with the rest of the -[series]({{< ref "/series/cool-algorithms/">}}). I don't think it showcases the +[series]({{< ref "/series/cool-algorithms/" >}}). I don't think it showcases the elegance of the _Gap Buffer_ in action like a C implementation full of `memmove`s would, but it does makes it short and sweet. From 7bc3d5c18fa868ce779123a942ca4865812dacf1 Mon Sep 17 00:00:00 2001 From: Bruno BELANYI Date: Fri, 2 Aug 2024 21:10:39 +0100 Subject: [PATCH 086/107] posts: reservoir-sampling: add k-element sampling --- .../2024-08-02-reservoir-sampling/index.md | 62 +++++++++++++++++++ 1 file changed, 62 insertions(+) diff --git a/content/posts/2024-08-02-reservoir-sampling/index.md b/content/posts/2024-08-02-reservoir-sampling/index.md index eabeab6..270c02c 100644 --- a/content/posts/2024-08-02-reservoir-sampling/index.md +++ b/content/posts/2024-08-02-reservoir-sampling/index.md @@ -81,3 +81,65 @@ probability $\frac{1}{N} \cdot \frac{N}{N + 1} = \frac{1}{N + 1}$ of being the element. And thus, we have proven $H_{N + 1}$ at the end of the loop. + +## Sampling $k$ element + +The code for sampling $k$ elements is very similar to the one-element case. + +```python +def sample[T](stream: Iterable[T], k: int = 1) -> list[T]: + stream_iter = iter(stream) + # Retain the first 'k' elements in the reservoir + res = list(itertools.islice(stream_iter, k)) + for i, val in enumerate(stream_iter, start=k): + j = random.randint(0, i) + # Replace one element at random with probability k/(i + 1) + if j < k: + res[j] = val + # Return 'k' randomly sampled elements + return res +``` + +### Proof + +Let us once again do a proof by induction, assuming the stream contains at least +$k$ items. + +#### Hypothesis $H_N$ + +After iterating through the first $N$ items in the stream, each of them has had +an equal $\frac{k}{N}$ probability of being sampled from the stream. + +#### Base Case $H_k$ + +We can trivially observe that the first $k$ element are sampled at the start of +the algorithm, $\frac{k}{k} = 1$, the hypothesis has been verified. + +#### Inductive Case + +For a given $N$, let us assume that $H_N$ holds. Let us now look at the events +of the loop iteration where `i = N`, in order to prove $H_{N + 1}$. + +`j = random.randint(0, i)` uniformly selects a value in the range $[0, i]$, +a.k.a $[0, N]$. We then have three cases: + +* `j >= k`, with probability $1 - \frac{k}{N + 1}$: we do not modify the +sampled reservoir at all. + +* `j < k`, with probability $\frac{k}{N + 1}$: we sample the new element to +replace the `j`-th element of the reservoir. Therefore for any element +$e \in [0, k[$ we can either have: + * $j = e$: the element _is_ replaced, probability $\frac{1}{k}$. + * $j \neq e$: the element is _not_ replaced, probability $\frac{k - 1}{k}$. + +We can now compute the probability that a previously sampled element is kept in +the reservoir: +$1 - \frac{k}{N + 1} + \frac{k}{N + 1} \cdot \frac{k - 1}{k} = \frac{N}{N + 1}$. + +By $H_N$, any of the first $N$ elements had a $\frac{k}{N}$ probability +of being sampled before at the start of the loop, each element now has a +probability $\frac{k}{N} \cdot \frac{N}{N + 1} = \frac{k}{N + 1}$ of being the +element. + +We have now proven that all elements have a probability $\frac{k}{N + 1}$ of +being sampled at the end of the loop, therefore $H_{N + 1}$ has been verified. From 9208b4b87427285c2baed4a8c7ad571209715524 Mon Sep 17 00:00:00 2001 From: Bruno BELANYI Date: Fri, 2 Aug 2024 21:16:33 +0100 Subject: [PATCH 087/107] nix: bump inputs --- flake.lock | 33 +++++++++++++++------------------ flake.nix | 1 - 2 files changed, 15 insertions(+), 19 deletions(-) diff --git a/flake.lock b/flake.lock index eb36380..06446f6 100644 --- a/flake.lock +++ b/flake.lock @@ -3,11 +3,11 @@ "flake-compat": { "flake": false, "locked": { - "lastModified": 1673956053, - "narHash": "sha256-4gtG9iQuiKITOjNQQeQIpoIB6b16fm+504Ch3sNKLd8=", + "lastModified": 1696426674, + "narHash": "sha256-kvjfFW7WAETZlt09AgDn1MrtKzP7t90Vf7vypd3OL1U=", "owner": "edolstra", "repo": "flake-compat", - "rev": "35bb57c0c8d8b62bbfd284272c928ceb64ddbde9", + "rev": "0f9255e01c2351cc7d116c072cb317785dd33b33", "type": "github" }, "original": { @@ -21,11 +21,11 @@ "systems": "systems" }, "locked": { - "lastModified": 1689068808, - "narHash": "sha256-6ixXo3wt24N/melDWjq70UuHQLxGV8jZvooRanIHXw0=", + "lastModified": 1710146030, + "narHash": "sha256-SZ5L6eA7HJ/nmkzGG7/ISclqe6oZdOZTNoesiInkXPQ=", "owner": "numtide", "repo": "flake-utils", - "rev": "919d646de7be200f3bf08cb76ae1f09402b6f9b4", + "rev": "b1d9ab70662946ef0850d488da1c9019f3a9752a", "type": "github" }, "original": { @@ -43,11 +43,11 @@ ] }, "locked": { - "lastModified": 1660459072, - "narHash": "sha256-8DFJjXG8zqoONA1vXtgeKXy68KdJL5UaXR8NtVMUbx8=", + "lastModified": 1709087332, + "narHash": "sha256-HG2cCnktfHsKV0s4XW83gU3F57gaTljL9KNSuG6bnQs=", "owner": "hercules-ci", "repo": "gitignore.nix", - "rev": "a20de23b925fd8264fd7fad6454652e142fd7f73", + "rev": "637db329424fd7e46cf4185293b9cc8c88c95394", "type": "github" }, "original": { @@ -58,11 +58,11 @@ }, "nixpkgs": { "locked": { - "lastModified": 1691155369, - "narHash": "sha256-CIuJO5pgwCMsZM8flIU2OiZ79QfDCesXPsAiokCzlNM=", + "lastModified": 1722415718, + "narHash": "sha256-5US0/pgxbMksF92k1+eOa8arJTJiPvsdZj9Dl+vJkM4=", "owner": "NixOS", "repo": "nixpkgs", - "rev": "7d050b98e51cdbdd88ad960152d398d41c7ff5b4", + "rev": "c3392ad349a5227f4a3464dce87bcc5046692fce", "type": "github" }, "original": { @@ -75,9 +75,6 @@ "pre-commit-hooks": { "inputs": { "flake-compat": "flake-compat", - "flake-utils": [ - "futils" - ], "gitignore": "gitignore", "nixpkgs": [ "nixpkgs" @@ -87,11 +84,11 @@ ] }, "locked": { - "lastModified": 1691093055, - "narHash": "sha256-sjNWYpDHc6vx+/M0WbBZKltR0Avh2S43UiDbmYtfHt0=", + "lastModified": 1721042469, + "narHash": "sha256-6FPUl7HVtvRHCCBQne7Ylp4p+dpP3P/OYuzjztZ4s70=", "owner": "cachix", "repo": "pre-commit-hooks.nix", - "rev": "ebb43bdacd1af8954d04869c77bc3b61fde515e4", + "rev": "f451c19376071a90d8c58ab1a953c6e9840527fd", "type": "github" }, "original": { diff --git a/flake.nix b/flake.nix index c70daf0..f3b8652 100644 --- a/flake.nix +++ b/flake.nix @@ -22,7 +22,6 @@ repo = "pre-commit-hooks.nix"; ref = "master"; inputs = { - flake-utils.follows = "futils"; nixpkgs.follows = "nixpkgs"; nixpkgs-stable.follows = "nixpkgs"; }; From 0f8e401067e5f92a5d8d457c62ae50c63a955cb6 Mon Sep 17 00:00:00 2001 From: Bruno BELANYI Date: Sat, 10 Aug 2024 16:43:58 +0100 Subject: [PATCH 088/107] posts: add 'kd-tree' --- content/posts/2024-08-10-kd-tree/index.md | 27 +++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 content/posts/2024-08-10-kd-tree/index.md diff --git a/content/posts/2024-08-10-kd-tree/index.md b/content/posts/2024-08-10-kd-tree/index.md new file mode 100644 index 0000000..5780291 --- /dev/null +++ b/content/posts/2024-08-10-kd-tree/index.md @@ -0,0 +1,27 @@ +--- +title: "k-d Tree" +date: 2024-08-10T11:50:33+01:00 +draft: false # I don't care for draft mode, git has branches for that +description: "Points in spaaaaace!" +tags: + - algorithms + - data structures + - python +categories: + - programming +series: + - Cool algorithms +favorite: false +disable_feed: false +--- + +The [_k-d Tree_][wiki] is a useful way to map points in space and make them +efficient to query. + +I ran into them during my studies in graphics, as they are one of the +possible acceleration structures for [ray-casting] operations. + +[wiki]: https://en.wikipedia.org/wiki/K-d_tree +[ray-casting]: https://en.wikipedia.org/wiki/Ray_casting + + From e3a3930ff332bfd1ae3cd1150af149fde92f7476 Mon Sep 17 00:00:00 2001 From: Bruno BELANYI Date: Sat, 10 Aug 2024 16:44:57 +0100 Subject: [PATCH 089/107] posts: kd-tree: add construction --- content/posts/2024-08-10-kd-tree/index.md | 99 +++++++++++++++++++++++ 1 file changed, 99 insertions(+) diff --git a/content/posts/2024-08-10-kd-tree/index.md b/content/posts/2024-08-10-kd-tree/index.md index 5780291..773c0ff 100644 --- a/content/posts/2024-08-10-kd-tree/index.md +++ b/content/posts/2024-08-10-kd-tree/index.md @@ -25,3 +25,102 @@ possible acceleration structures for [ray-casting] operations. [ray-casting]: https://en.wikipedia.org/wiki/Ray_casting + +## Implementation + +As usual, this will be in Python, though its lack of proper discriminated enums +makes it more verbose than would otherwise be necessary. + +### Pre-requisites + +Let's first define what kind of space our _k-d Tree_ is dealing with. In this +instance $k = 3$ just like in the normal world. + +```python +class Point(NamedTuple): + x: float + y: float + z: float + +class Axis(IntEnum): + X = 0 + Y = 1 + Z = 2 + + def next(self) -> Axis: + # Each level of the tree is split along a different axis + return Axis((self + 1) % 3) +``` + +### Representation + +The tree is represented by `KdTree`, each of its leaf nodes is a `KdLeafNode` +and its inner nodes are `KdSplitNode`s. + +For each point in space, the tree can also keep track of an associated value, +similar to a dictionary or other mapping data structure. Hence we will make our +`KdTree` generic to this mapped type `T`. + +#### Leaf node + +A leaf node contains a number of points that were added to the tree. For each +point, we also track their mapped value, hence the `dict[Point, T]`. + +```python +class KdLeafNode[T]: + points: dict[Point, T] + + def __init__(self): + self.points = {} +``` + +#### Split node + +An inner node must partition the space into two sub-spaces along a given axis +and mid-point (thus defining a plane). All points that are "to the left" of the +plane will be kept in one child, while all the points "to the right" will be in +the other. Similar to a [_Binary Search Tree_][bst]'s inner nodes. + +[bst]: https://en.wikipedia.org/wiki/Binary_search_tree + +```python +class KdSplitNode[T]: + axis: Axis + mid: float + children: tuple[KdTreeNode[T], KdTreeNode[T]] + + # Convenience function to index into the child which contains `point` + def _index(self, point: Point) -> int: + return 0 if point[self.axis] <= self.mid else 1 +``` + +#### Tree + +The tree itself is merely a wrapper around its inner nodes. + +Once annoying issue about writing this in Python is the lack of proper +discriminated enum types. So we need to create a wrapper type for the nodes +(`KdNode`) to allow for splitting when updating the tree. + +```python +class KdNode[T]: + # Wrapper around leaf/inner nodes, the poor man's discriminated enum + inner: KdLeafNode[T] | KdSplitNode[T] + + def __init__(self): + self.inner = KdLeafNode() + + # Convenience constructor used when splitting a node + @classmethod + def from_items(cls, items: Iterable[tuple[Point, T]]) -> KdNode[T]: + res = cls() + res.inner.points.update(items) + return res + +class KdTree[T]: + _root: KdNode[T] + + def __init__(self): + # Tree starts out empty + self._root = KdNode() +``` From a12642a9bd5afe4edffb76163b39bae03c8158d0 Mon Sep 17 00:00:00 2001 From: Bruno BELANYI Date: Sat, 10 Aug 2024 16:47:11 +0100 Subject: [PATCH 090/107] posts: kd-tree: add insertion --- content/posts/2024-08-10-kd-tree/index.md | 77 +++++++++++++++++++++++ 1 file changed, 77 insertions(+) diff --git a/content/posts/2024-08-10-kd-tree/index.md b/content/posts/2024-08-10-kd-tree/index.md index 773c0ff..3941019 100644 --- a/content/posts/2024-08-10-kd-tree/index.md +++ b/content/posts/2024-08-10-kd-tree/index.md @@ -124,3 +124,80 @@ class KdTree[T]: # Tree starts out empty self._root = KdNode() ``` + +### Inserting a point + +To add a point to the tree, we simply recurse from node to node, similar to a +_BST_'s insertion algorithm. Once we've found the correct leaf node to insert +our point into, we simply do so. + +If that leaf node goes over the maximum number of points it can store, we must +then split it along an axis, cycling between `X`, `Y`, and `Z` at each level of +the tree (i.e: splitting along the `X` axis on the first level, then `Y` on the +second, then `Z` after that, and then `X`, etc...). + +```python +# How many points should be stored in a leaf node before being split +MAX_CAPACITY = 32 + +def median(values: Iterable[float]) -> float: + sorted_values = sorted(values) + mid_point = len(sorted_values) // 2 + if len(sorted_values) % 2 == 1: + return sorted_values[mid_point] + a, b = sorted_values[mid_point], sorted_values[mid_point + 1] + return a + (b - a) / 2 + +def partition[T]( + pred: Callable[[T], bool], + iterable: Iterable[T] +) -> tuple[list[T], list[T]]: + truths, falses = [], [] + for v in iterable: + (truths if pred(v) else falses).append(v) + return truths, falses + +def split_leaf[T](node: KdLeafNode[T], axis: Axis) -> KdSplitNode[T]: + # Find the median value for the given axis + mid = median(p[axis] for p in node.points) + # Split into left/right children according to the mid-point and axis + left, right = partition(lambda kv: kv[0][axis] <= mid, node.points.items()) + return KdSplitNode( + split_axis, + mid, + (KdNode.from_items(left), KdNode.from_items(right)), + ) + +class KdTree[T]: + def insert(self, point: Point, val: T) -> bool: + # Forward to the root node, choose `X` as the first split axis + return self._root.insert(point, val, Axis.X) + +class KdLeafNode[T]: + def insert(self, point: Point, val: T, split_axis: Axis) -> bool: + # Check whether we're overwriting a previous value + was_mapped = point in self.points + # Store the corresponding value + self.points[point] = val + # Return whether we've performed an overwrite + return was_mapped + +class KdSplitNode[T]: + def insert(self, point: Point, val: T, split_axis: Axis) -> bool: + # Find the child which contains the point + child = self.children[self._index(point)] + # Recurse into it, choosing the next split axis + return child.insert(point, val, split_axis.next()) + +class KdNode[T]: + def insert(self, point: Point, val: T, split_axis: Axis) -> bool: + # Add the point to the wrapped node... + res = self.inner.insert(point, val, split_axis) + # ... And take care of splitting leaf nodes when necessary + if ( + isinstance(self.inner, KdLeafNode) + and len(self.inner.points) > MAX_CAPACITY + ): + self.inner = split_leaf(self.inner, split_axis) + return res +``` From 74d4aa87e67f81e79d3744183fa2d8c1418c0fdb Mon Sep 17 00:00:00 2001 From: Bruno BELANYI Date: Sat, 10 Aug 2024 16:47:59 +0100 Subject: [PATCH 091/107] posts: kd-tree: add search --- content/posts/2024-08-10-kd-tree/index.md | 28 +++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/content/posts/2024-08-10-kd-tree/index.md b/content/posts/2024-08-10-kd-tree/index.md index 3941019..407951a 100644 --- a/content/posts/2024-08-10-kd-tree/index.md +++ b/content/posts/2024-08-10-kd-tree/index.md @@ -201,3 +201,31 @@ class KdNode[T]: self.inner = split_leaf(self.inner, split_axis) return res ``` + +### Searching for a point + +Looking for a given point in the tree look very similar to a _BST_'s search, +each leaf node dividing the space into two sub-spaces, only one of which +contains the point. + +```python +class KdTree[T]: + def lookup(self, point: Point) -> T | None: + # Forward to the root node + return self._root.lookup(point) + +class KdNode[T]: + def lookup(self, point: Point) -> T | None: + # Forward to the wrapped node + return self.inner.lookup(point) + +class KdLeafNode[T]: + def lookup(self, point: Point) -> T | None: + # Simply check whether we've stored the point in this leaf + return self.points.get(point) + +class KdSplitNode[T]: + def lookup(self, point: Point) -> T | None: + # Recurse into the child which contains the point + return self.children[self._index(point)].lookup(point) +``` From 8acb675b1674e2293329f291531a9d595d825860 Mon Sep 17 00:00:00 2001 From: Bruno BELANYI Date: Sat, 10 Aug 2024 16:48:32 +0100 Subject: [PATCH 092/107] posts: kd-tree: add nearest neighbour --- content/posts/2024-08-10-kd-tree/index.md | 241 ++++++++++++++++++++++ 1 file changed, 241 insertions(+) diff --git a/content/posts/2024-08-10-kd-tree/index.md b/content/posts/2024-08-10-kd-tree/index.md index 407951a..3135647 100644 --- a/content/posts/2024-08-10-kd-tree/index.md +++ b/content/posts/2024-08-10-kd-tree/index.md @@ -229,3 +229,244 @@ class KdSplitNode[T]: # Recurse into the child which contains the point return self.children[self._index(point)].lookup(point) ``` + +### Closest points + +Now to look at the most interesting operation one can do on a _k-d Tree_: +querying for the objects which are closest to a given point (i.e: the [Nearest +neighbour search][nns]. + +This is a more complicated algorithm, which will also need some modifications to +current _k-d Tree_ implementation in order to track just a bit more information +about the points it contains. + +[nns]: https://en.wikipedia.org/wiki/Nearest_neighbor_search + +#### A notion of distance + +To search for the closest points to a given origin, we first need to define +which [distance](https://en.wikipedia.org/wiki/Distance) we are using in our +space. + +For this example, we'll simply be using the usual definition of [(Euclidean) +distance][euclidean-distance]. + +[euclidean-distance]: https://en.wikipedia.org/wiki/Euclidean_distance + +```python +def dist(point: Point, other: Point) -> float: + return sqrt(sum((a - b) ** 2 for a, b in zip(self, other))) +``` + +#### Tracking the tree's boundaries + +To make the query efficient, we'll need to track the tree's boundaries: the +bounding box of all points contained therein. This will allow us to stop the +search early once we've found enough points and can be sure that the rest of the +tree is too far away to qualify. + +For this, let's define the `AABB` (Axis-Aligned Bounding Box) class. + +```python +class Point(NamedTuple): + # Convenience function to replace the coordinate along a given dimension + def replace(self, axis: Axis, new_coord: float) -> Point: + coords = list(self) + coords[axis] = new_coord + return Point(coords) + +class AABB(NamedTuple): + # Lowest coordinates in the box + low: Point + # Highest coordinates in the box + high: Point + + # An empty box + @classmethod + def empty(cls) -> AABB: + return cls( + Point(*(float("inf"),) * 3), + Point(*(float("-inf"),) * 3), + ) + + # Split the box into two along a given axis for a given mid-point + def split(axis: Axis, mid: float) -> tuple[AABB, AABB]: + assert self.low[axis] <= mid <= self.high[axis] + return ( + AABB(self.low, self.high.replace(axis, mid)), + AABB(self.low.replace(axis, mid), self.high), + ) + + # Extend a box to contain a given point + def extend(self, point: Point) -> None: + low = NamedTuple(*(map(min, zip(self.low, point)))) + high = NamedTuple(*(map(max, zip(self.high, point)))) + return AABB(low, high) + + # Return the shortest between a given point and the box + def dist_to_point(self, point: Point) -> float: + deltas = ( + max(self.low[axis] - point[axis], 0, point[axis] - self.high[axis]) + for axis in Axis + ) + return dist(Point(0, 0, 0), Point(*deltas)) +``` + +And do the necessary modifications to the `KdTree` to store the bounding box and +update it as we add new points. + +```python +class KdTree[T]: + _root: KdNode[T] + # New field: to keep track of the tree's boundaries + _aabb: AABB + + def __init__(self): + self._root = KdNode() + # Initialize the empty tree with an empty bounding box + self._aabb = AABB.empty() + + def insert(self, point: Point, val: T) -> bool: + # Extend the AABB for our k-d Tree when adding a point to it + self._aabb = self._aabb.extend(point) + return self._root.insert(point, val, Axis.X) +``` + +#### `MaxHeap` + +Python's builtin [`heapq`][heapq] module provides the necessary functions to +create and interact with a [_Priority Queue_][priority-queue], in the form of a +[_Binary Heap_][binary-heap]. + +Unfortunately, Python's library maintains a _min-heap_, which keeps the minimum +element at the root. For this algorithm, we're interested in having a +_max-heap_, with the maximum at the root. + +Thankfully, one can just reverse the comparison function for each element to +convert between the two. Let's write a `MaxHeap` class making use of this +library, with a `Reverse` wrapper class to reverse the order of elements +contained within it (similar to [Rust's `Reverse`][reverse]). + +[binary-heap]: https://en.wikipedia.org/wiki/Binary_heap +[heapq]: https://docs.python.org/3/library/heapq.html +[priority-queue]: https://en.wikipedia.org/wiki/Priority_queue +[reverse]: https://doc.rust-lang.org/std/cmp/struct.Reverse.html + +```python +# Reverses the wrapped value's ordering +@functools.total_ordering +class Reverse[T]: + value: T + + def __init__(self, value: T): + self.value = value + + def __lt__(self, other: Reverse[T]) -> bool: + return self.value > other.value + + def __eq__(self, other: Reverse[T]) -> bool: + return self.value == other.value + +class MaxHeap[T]: + _heap: list[Reverse[T]] + + def __init__(self): + self._heap = [] + + def __len__(self) -> int: + return len(self._heap) + + def __iter__(self) -> Iterator[T]: + yield from (item.value for item in self._heap) + + # Push a value on the heap + def push(self, value: T) -> None: + heapq.heappush(self._heap, Reverse(value)) + + # Peek at the current maximum value + def peek(self) -> T: + return self._heap[0].value + + # Pop and return the highest value + def pop(self) -> T: + return heapq.heappop(self._heap).value + + # Pushes a value onto the heap, pops and returns the highest value + def pushpop(self, value: T) -> None: + return heapq.heappushpop(self._heap, Reverse(value)).value +``` + +#### The actual Implementation + +Now that we have written the necessary building blocks, let's tackle the +Implementation of `closest` for our _k-d Tree_. + +```python +# Wrapper type for closest points, ordered by `distance` +@dataclasses.dataclass(order=True) +class ClosestPoint[T](NamedTuple): + point: Point = field(compare=False) + value: T = field(compare=False) + distance: float + +class KdTree[T]: + def closest(self, point: Point, n: int = 1) -> list[ClosestPoint[T]]: + assert n > 0 + # Create the output heap + res = MaxHeap() + # Recurse onto the root node + self._root.closest(point, res, n, self._aabb) + # Return the resulting list, from closest to farthest + return sorted(res) + +class KdNode[T]: + def closest( + self, + point: Point, + out: MaxHeap[ClosestPoint[T]], + n: int, + bounds: AABB, + ) -> None: + # Forward to the wrapped node + self.inner.closest(point, out, n, bounds) + +class KdLeafNode[T]: + def closest( + self, + point: Point, + out: MaxHeap[ClosestPoint[T]], + n: int, + bounds: AABB, + ) -> None: + # At the leaf, simply iterate over all points and add them to the heap + for p, val in self.points.items(): + item = ClosestPoint(p, val, dist(p, point)) + if len(out) < n: + # If the heap isn't full, just push + out.push(item) + elif out.peek().distance > item.distance: + # Otherwise, push and pop to keep the heap at `n` elements + out.pushpop(item) + +class KdSplitNode[T]: + def closest( + self, + point: Point, + out: list[ClosestPoint[T]], + n: int, + bounds: AABB, + ) -> None: + index = self._index(point) + children_bounds = bounds.split(self.axis, self.mid) + # Iterate over the child which contains the point, then its neighbour + for i in (index, 1 - index): + child, bounds = self.children[i], children_bounds[i] + # `min_dist` is 0 for the first child, and the minimum distance of + # all points contained in the second child + min_dist = bounds.dist_to_point(point) + # If the heap is at capacity and the child to inspect too far, stop + if len(out) == n and min_dist > out.peek().distance: + return + # Otherwise, recurse + child.closest(point, out, n, bounds) +``` From 53e1fbaf3824ffec28d92d511bfb919c87467338 Mon Sep 17 00:00:00 2001 From: Bruno BELANYI Date: Fri, 16 Aug 2024 08:41:23 +0100 Subject: [PATCH 093/107] posts: add kd-tree-revisited --- .../2024-08-17-kd-tree-revisited/index.md | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 content/posts/2024-08-17-kd-tree-revisited/index.md diff --git a/content/posts/2024-08-17-kd-tree-revisited/index.md b/content/posts/2024-08-17-kd-tree-revisited/index.md new file mode 100644 index 0000000..137e2fd --- /dev/null +++ b/content/posts/2024-08-17-kd-tree-revisited/index.md @@ -0,0 +1,22 @@ +--- +title: "Kd Tree Revisited" +date: 2024-08-17T14:20:22+01:00 +draft: false # I don't care for draft mode, git has branches for that +description: "Simplifying the nearest neighbour search" +tags: + - algorithms + - data structures + - python +categories: + - programming +series: + - Cool algorithms +favorite: false +disable_feed: false +--- + +After giving it a bit of thought, I've found a way to simplify the nearest +neighbour search (i.e: the `closest` method) for the `KdTree` I implemented in +[my previous post]({{< relref "../2024-08-10-kd-tree/index.md" >}}). + + From f35bad4c89977786664c87337460a014a6ec7b85 Mon Sep 17 00:00:00 2001 From: Bruno BELANYI Date: Fri, 16 Aug 2024 08:41:49 +0100 Subject: [PATCH 094/107] posts: kd-tree-revisited: add presentation --- .../2024-08-17-kd-tree-revisited/index.md | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/content/posts/2024-08-17-kd-tree-revisited/index.md b/content/posts/2024-08-17-kd-tree-revisited/index.md index 137e2fd..5e5227f 100644 --- a/content/posts/2024-08-17-kd-tree-revisited/index.md +++ b/content/posts/2024-08-17-kd-tree-revisited/index.md @@ -20,3 +20,21 @@ neighbour search (i.e: the `closest` method) for the `KdTree` I implemented in [my previous post]({{< relref "../2024-08-10-kd-tree/index.md" >}}). + +## The improvement + +That post implemented the nearest neighbour search by keeping track of the +tree's boundaries (through `AABB`), and each of its sub-trees (through +`AABB.split`), and testing for the early exit condition by computing the +distance of the search's origin to each sub-tree's boundaries. + +Instead of _explicitly_ keeping track of each sub-tree's boundaries, we can +implicitly compute it when recursing down the tree. + +To check for the distance between the queried point and the splitting plane of +inner nodes: we simply need to project the origin onto that plane, thus giving +us a minimal bound on the distance of the points stored on the other side. + +This can be easily computed from the `axis` and `mid` values which are stored in +the inner nodes: to project the node on the plane we simply replace its +coordinate for this axis by `mid`. From 2c1cd7df1cd16eb3bda12a1a0c352f1355783490 Mon Sep 17 00:00:00 2001 From: Bruno BELANYI Date: Fri, 16 Aug 2024 08:42:10 +0100 Subject: [PATCH 095/107] posts: kd-tree-revisited: add implementation --- .../2024-08-17-kd-tree-revisited/index.md | 72 +++++++++++++++++++ 1 file changed, 72 insertions(+) diff --git a/content/posts/2024-08-17-kd-tree-revisited/index.md b/content/posts/2024-08-17-kd-tree-revisited/index.md index 5e5227f..be15f48 100644 --- a/content/posts/2024-08-17-kd-tree-revisited/index.md +++ b/content/posts/2024-08-17-kd-tree-revisited/index.md @@ -38,3 +38,75 @@ us a minimal bound on the distance of the points stored on the other side. This can be easily computed from the `axis` and `mid` values which are stored in the inner nodes: to project the node on the plane we simply replace its coordinate for this axis by `mid`. + +## Simplified search + +With that out of the way, let's now see how `closest` can be implemented without +needing to track the tree's `AABB` at the root: + +```python +# Wrapper type for closest points, ordered by `distance` +@dataclasses.dataclass(order=True) +class ClosestPoint[T](NamedTuple): + point: Point = field(compare=False) + value: T = field(compare=False) + distance: float + +class KdTree[T]: + def closest(self, point: Point, n: int = 1) -> list[ClosestPoint[T]]: + assert n > 0 + res = MaxHeap() + # Instead of passing an `AABB`, we give an initial projection point, + # the query origin itself (since we haven't visited any split node yet) + self._root.closest(point, res, n, point) + return sorted(res) + +class KdNode[T]: + def closest( + self, + point: Point, + out: MaxHeap[ClosestPoint[T]], + n: int, + projection: Point, + ) -> None: + # Same implementation + self.inner.closest(point, out, n, bounds) + +class KdLeafNode[T]: + def closest( + self, + point: Point, + out: MaxHeap[ClosestPoint[T]], + n: int, + projection: Point, + ) -> None: + # Same implementation + for p, val in self.points.items(): + item = ClosestPoint(p, val, dist(p, point)) + if len(out) < n: + out.push(item) + elif out.peek().distance > item.distance: + out.pushpop(item) + +class KdSplitNode[T]: + def closest( + self, + point: Point, + out: list[ClosestPoint[T]], + n: int, + projection: Point, + ) -> None: + index = self._index(point) + self.children[index].closest(point, out, n, projection) + # Project onto the splitting plane, for a minimum distance to its points + projection = projection.replace(self.axis, self.mid) + # If we're at capacity and can't possibly find any closer points, exit + if len(out) == n and dist(point, projection) > out.peek().distance: + return + # Otherwise recurse on the other side to check for nearer neighbours + self.children[1 - index].closest(point, out, n, projection) +``` + +As you can see, the main difference is in `KdSplitNode`'s implementation, where +we can quickly compute the minimum distance between the search's origin and all +potential points in that subspace. From 23b108b2995b3a6f5ab3179b2d43df376faf9a91 Mon Sep 17 00:00:00 2001 From: Bruno BELANYI Date: Sun, 15 Sep 2024 18:20:42 +0100 Subject: [PATCH 096/107] config: fix deprecated options Unfortunately, my theme is still using the deprecated options and its code, and has been archived upstream... Maybe that will force me to move to writing my own theme in the future instead. --- config.yaml | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/config.yaml b/config.yaml index b0aadf8..7dc9e41 100644 --- a/config.yaml +++ b/config.yaml @@ -6,8 +6,6 @@ theme: - "hugo-atom-feed" - "anubis" paginate: 5 -disqusShortname: "" -googleAnalytics: "" enableRobotsTXT: true enableEmoji: true @@ -69,6 +67,12 @@ params: pingback: true mathjax: true +services: + disqus: + shortname: "" + googleAnalytics: + ID: "" + taxonomies: category: "categories" tag: "tags" From af4e13d6e8a19eb30f7bd5704550e6ef928181d8 Mon Sep 17 00:00:00 2001 From: Bruno BELANYI Date: Fri, 8 Nov 2024 16:05:24 +0000 Subject: [PATCH 097/107] treewide: fix 'serie' -> 'series' typo --- config.yaml | 2 +- i18n/en.yaml | 4 ++-- i18n/fr.yaml | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/config.yaml b/config.yaml index 7dc9e41..c13a739 100644 --- a/config.yaml +++ b/config.yaml @@ -76,7 +76,7 @@ services: taxonomies: category: "categories" tag: "tags" - serie: "series" + series: "series" markup: goldmark: diff --git a/i18n/en.yaml b/i18n/en.yaml index cd75b25..1f24308 100644 --- a/i18n/en.yaml +++ b/i18n/en.yaml @@ -1,5 +1,5 @@ -serie: - other: "serie" +series: + other: "series" Series: other: "Series" diff --git a/i18n/fr.yaml b/i18n/fr.yaml index 22986f5..88dfcd3 100644 --- a/i18n/fr.yaml +++ b/i18n/fr.yaml @@ -1,4 +1,4 @@ -serie: +series: other: "série" Series: From 39944ed35db371ed050285c688c6645d05cc58f5 Mon Sep 17 00:00:00 2001 From: Bruno BELANYI Date: Fri, 8 Nov 2024 16:06:19 +0000 Subject: [PATCH 098/107] posts: fix typos --- content/posts/2020-12-07-git-basics/index.md | 4 ++-- content/posts/2022-11-02-multiple-dispatch-in-c++/index.md | 2 +- content/posts/2024-07-06-gap-buffer/index.md | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/content/posts/2020-12-07-git-basics/index.md b/content/posts/2020-12-07-git-basics/index.md index 29187aa..998eaf6 100644 --- a/content/posts/2020-12-07-git-basics/index.md +++ b/content/posts/2020-12-07-git-basics/index.md @@ -134,7 +134,7 @@ branch. #### Fixup, a practical example A specific kind of squashing which I use frequently is the notion of `fixup`s. -Say you've commited a change (*A*), and later on notice that it is missing +Say you've committed a change (*A*), and later on notice that it is missing a part of the changeset. You can decide to commit that missing part (*A-bis*) and annotate it to mean that it is linked to *A*. @@ -184,7 +184,7 @@ After applying the rebase, you find yourself with the complete change inside This is especially useful when you want to apply suggestion on a merge request after it was reviewed. You can keep a clean history without those pesky `Apply -suggestion ...` commmits being part of your history. +suggestion ...` commits being part of your history. ### Lost commits and the reflog diff --git a/content/posts/2022-11-02-multiple-dispatch-in-c++/index.md b/content/posts/2022-11-02-multiple-dispatch-in-c++/index.md index 838de49..0e59968 100644 --- a/content/posts/2022-11-02-multiple-dispatch-in-c++/index.md +++ b/content/posts/2022-11-02-multiple-dispatch-in-c++/index.md @@ -261,7 +261,7 @@ this new `SpaceStation` variant at every point you `visit` the `SpaceObject`s. ## The Expression Problem -One issue we have not been able to move past in these exemples is the +One issue we have not been able to move past in these examples is the [Expression Problem][expression-problem]. In two words, this means that we can't add a new data type (e.g: `SpaceStation`), or a new operation (e.g: `land_on`) to our current code without re-compiling it. diff --git a/content/posts/2024-07-06-gap-buffer/index.md b/content/posts/2024-07-06-gap-buffer/index.md index b5b0b3a..0bb3d54 100644 --- a/content/posts/2024-07-06-gap-buffer/index.md +++ b/content/posts/2024-07-06-gap-buffer/index.md @@ -133,7 +133,7 @@ and moving the start of the gap further right. ```python def insert(self, val: str) -> None: - # Ensure we have enouh space to insert the whole string + # Ensure we have enough space to insert the whole string if len(val) > self.gap_length: self.grow(max(self.capacity * 2, self.string_length + len(val))) # Fill the gap with the given string @@ -163,7 +163,7 @@ def delete(self, dist: int = 1) -> None: ### Moving the cursor Moving the cursor along the buffer will shift letters from one side of the gap -to the other, moving them accross from prefix to suffix and back. +to the other, moving them across from prefix to suffix and back. I find Python's list slicing not quite as elegant to read as a `memmove`, though it does make for a very small and efficient implementation. From 87430313017eb2504e7722ac221fb0f5bc7a3d0d Mon Sep 17 00:00:00 2001 From: Bruno BELANYI Date: Fri, 8 Nov 2024 19:49:01 +0100 Subject: [PATCH 099/107] content: wish-lists: update thermometer links --- content/wish-lists.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/content/wish-lists.md b/content/wish-lists.md index 5e3b735..a1f9a21 100644 --- a/content/wish-lists.md +++ b/content/wish-lists.md @@ -21,5 +21,7 @@ A few of my Amazon wish lists in case you want to give me a gift. * Knives * [Gyuto, damascus finish, 210mm](https://www.kitchenprovisions.co.uk/collections/stainless-steel/products/gyuto-knife-vg10-damascus-finish-ohishi) * [Gyuto, polished finish, black ferrule 240mm](https://www.kitchenprovisions.co.uk/collections/sukenari/products/gyuto-knife-hap40-powder-steel-polished-finish-sukenari?variant=39743545016382) - * [Combustion Inc thermometer and display](https://combustion.inc/products/predictive-thermometer-and-display) - * [Get the one with the range extender if you *really* want to spoil me](https://combustion.inc/products/predictive-thermometer-display) + * [Combustion Inc thermometer and + display](https://combustion.inc/products/predictive-thermometer-gen2-wifi-display) + * [Get the one with the range extender if you *really* want to spoil + me](https://combustion.inc/products/predictive-thermometer-gen2-wifi-booster-wifi-display) From bba7a1e7d22b7d2b094f302ce6cd6bd92602a872 Mon Sep 17 00:00:00 2001 From: Bruno BELANYI Date: Fri, 8 Nov 2024 19:51:02 +0100 Subject: [PATCH 100/107] content: wish-lists: remove knives I already have one fancy knife, that's enoigh (for now?). --- content/wish-lists.md | 3 --- 1 file changed, 3 deletions(-) diff --git a/content/wish-lists.md b/content/wish-lists.md index a1f9a21..99ea727 100644 --- a/content/wish-lists.md +++ b/content/wish-lists.md @@ -18,9 +18,6 @@ A few of my Amazon wish lists in case you want to give me a gift. * [Chef's presses](https://www.thechefspress.com/shop) * [UK link](https://www.kitchenprovisions.co.uk/products/the-chefs-press-8oz-13oz) * [FR link](https://www.thekitchenlab.fr/fr/p/la-presse-du-chef-poids-de-friture-bruce-hill-13-oz/) - * Knives - * [Gyuto, damascus finish, 210mm](https://www.kitchenprovisions.co.uk/collections/stainless-steel/products/gyuto-knife-vg10-damascus-finish-ohishi) - * [Gyuto, polished finish, black ferrule 240mm](https://www.kitchenprovisions.co.uk/collections/sukenari/products/gyuto-knife-hap40-powder-steel-polished-finish-sukenari?variant=39743545016382) * [Combustion Inc thermometer and display](https://combustion.inc/products/predictive-thermometer-gen2-wifi-display) * [Get the one with the range extender if you *really* want to spoil From 4f7ac42b3d26e60f4ea659abca7c727df94d9257 Mon Sep 17 00:00:00 2001 From: Bruno BELANYI Date: Wed, 13 Nov 2024 18:20:07 +0000 Subject: [PATCH 101/107] nix: simplify direnv integration --- .envrc | 7 ------- 1 file changed, 7 deletions(-) diff --git a/.envrc b/.envrc index 116d0c6..3550a30 100644 --- a/.envrc +++ b/.envrc @@ -1,8 +1 @@ -use_flake() { - watch_file flake.nix - watch_file flake.lock - eval "$(nix print-dev-env)" -} - use flake -eval "$shellHooks" From 3724ab61fc255834b93c2ab9a0bf3f345e673c26 Mon Sep 17 00:00:00 2001 From: Bruno BELANYI Date: Sun, 24 Nov 2024 13:42:48 +0000 Subject: [PATCH 102/107] =?UTF-8?q?content:=20wish-lists:=20add=20cannel?= =?UTF-8?q?=C3=A9s=20molds?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- content/wish-lists.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/content/wish-lists.md b/content/wish-lists.md index 99ea727..e0a4548 100644 --- a/content/wish-lists.md +++ b/content/wish-lists.md @@ -22,3 +22,5 @@ A few of my Amazon wish lists in case you want to give me a gift. display](https://combustion.inc/products/predictive-thermometer-gen2-wifi-display) * [Get the one with the range extender if you *really* want to spoil me](https://combustion.inc/products/predictive-thermometer-gen2-wifi-booster-wifi-display) + * [Cannelés + molds](https://www.laboetgato.fr/en/moules-a-canneles/13964-mould-for-canneles-non-polished-copper-o-45-mm-3333331010026.html) From 8276c1f4e34c56e2fd18a642541fe966a99f1fb8 Mon Sep 17 00:00:00 2001 From: Bruno BELANYI Date: Thu, 16 Jan 2025 19:57:18 -0500 Subject: [PATCH 103/107] content: about: remove 'senior' title It's mostly a funny joke with myself, as the contract stated that I was a senior engineer, even though they hired me as an L3. --- content/about.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/content/about.md b/content/about.md index 04d7226..3e521e9 100644 --- a/content/about.md +++ b/content/about.md @@ -4,7 +4,7 @@ description: "About me" date: 2020-07-14 --- -I'm currently working as a Senior Software Engineer at [Google][google], as part +I'm currently working as a Software Engineer at [Google][google], as part of their Embedded Graphics Drivers team for Pixel devices. [google]: https://www.linkedin.com/company/google/mycompany/verification/ From 3a36c4c44ea14b89b25572e57685f5f3cfb20080 Mon Sep 17 00:00:00 2001 From: Bruno BELANYI Date: Thu, 16 Jan 2025 19:58:33 -0500 Subject: [PATCH 104/107] content: about: fix link to Google --- content/about.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/content/about.md b/content/about.md index 3e521e9..3303a55 100644 --- a/content/about.md +++ b/content/about.md @@ -7,7 +7,7 @@ date: 2020-07-14 I'm currently working as a Software Engineer at [Google][google], as part of their Embedded Graphics Drivers team for Pixel devices. -[google]: https://www.linkedin.com/company/google/mycompany/verification/ +[google]: https://www.linkedin.com/company/google/ You can find my CV [here](https://cv.belanyi.fr/en.pdf). Or [here](https://cv.belanyi.fr/fr.pdf) for the french version. From b7a405332c0e95a0b8f8ff35ea170cf653a9d945 Mon Sep 17 00:00:00 2001 From: Bruno BELANYI Date: Wed, 29 Jan 2025 12:44:20 +0000 Subject: [PATCH 105/107] posts: kd-tree-revisited: fix title --- content/posts/2024-08-17-kd-tree-revisited/index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/content/posts/2024-08-17-kd-tree-revisited/index.md b/content/posts/2024-08-17-kd-tree-revisited/index.md index be15f48..b01fa3b 100644 --- a/content/posts/2024-08-17-kd-tree-revisited/index.md +++ b/content/posts/2024-08-17-kd-tree-revisited/index.md @@ -1,5 +1,5 @@ --- -title: "Kd Tree Revisited" +title: "k-d Tree Revisited" date: 2024-08-17T14:20:22+01:00 draft: false # I don't care for draft mode, git has branches for that description: "Simplifying the nearest neighbour search" From 8ee2a234ec9d5a7da4840d31dfda05073413bb80 Mon Sep 17 00:00:00 2001 From: Bruno BELANYI Date: Wed, 29 Jan 2025 12:45:39 +0000 Subject: [PATCH 106/107] posts: kd-tree: fix typing --- content/posts/2024-08-10-kd-tree/index.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/content/posts/2024-08-10-kd-tree/index.md b/content/posts/2024-08-10-kd-tree/index.md index 3135647..2863201 100644 --- a/content/posts/2024-08-10-kd-tree/index.md +++ b/content/posts/2024-08-10-kd-tree/index.md @@ -298,7 +298,7 @@ class AABB(NamedTuple): ) # Extend a box to contain a given point - def extend(self, point: Point) -> None: + def extend(self, point: Point) -> AABB: low = NamedTuple(*(map(min, zip(self.low, point)))) high = NamedTuple(*(map(max, zip(self.high, point)))) return AABB(low, high) @@ -392,7 +392,7 @@ class MaxHeap[T]: return heapq.heappop(self._heap).value # Pushes a value onto the heap, pops and returns the highest value - def pushpop(self, value: T) -> None: + def pushpop(self, value: T) -> T: return heapq.heappushpop(self._heap, Reverse(value)).value ``` @@ -452,7 +452,7 @@ class KdSplitNode[T]: def closest( self, point: Point, - out: list[ClosestPoint[T]], + out: MaxHeap[ClosestPoint[T]], n: int, bounds: AABB, ) -> None: From f15aa0567eada3de4a24b6323bc570d1e9854af8 Mon Sep 17 00:00:00 2001 From: Bruno BELANYI Date: Wed, 29 Jan 2025 12:45:39 +0000 Subject: [PATCH 107/107] posts: kd-tree-revisited: fix typing --- content/posts/2024-08-17-kd-tree-revisited/index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/content/posts/2024-08-17-kd-tree-revisited/index.md b/content/posts/2024-08-17-kd-tree-revisited/index.md index b01fa3b..58d2a6b 100644 --- a/content/posts/2024-08-17-kd-tree-revisited/index.md +++ b/content/posts/2024-08-17-kd-tree-revisited/index.md @@ -92,7 +92,7 @@ class KdSplitNode[T]: def closest( self, point: Point, - out: list[ClosestPoint[T]], + out: MaxHeap[ClosestPoint[T]], n: int, projection: Point, ) -> None: