build: makefile: add deploy step

2021-08-08 16:23:51 +02:00
39 changed files with 122 additions and 3447 deletions
--- a/.drone.jsonnet
+++ b/.drone.jsonnet
@ -0,0 +1,64 @@
 local Pipeline(isDev) = {
  kind: "pipeline",
  name: if isDev then "deploy-dev" else "deploy-prod",
  # Dev ignores "master", prod only triggers on "master"
  trigger: { branch: { [if isDev then "exclude" else "include"]: [ "main" ] } },
  # We want to clone the submodules, which isn't done by default
  clone: { disable: true },
  steps: [
    {
      name: "clone",
      image: "plugins/git",
      recursive: true,
    },
    {
      name: "markdownlint",
      image: "06kellyjac/markdownlint-cli",
      commands: [
        "markdownlint --version",
        "markdownlint content/",
      ],
    },
    {
      name: "build",
      image: "klakegg/hugo",
      commands: [
        "hugo version",
        # If dev, include drafts and future articles, change base URL
        "hugo --minify" + if isDev then " -D -F -b https://dev.belanyi.fr" else "",
      ],
      [if !isDev then "environment"]: { HUGO_ENV: "production" }
    },
    {
      name: "deploy",
      image: "appleboy/drone-scp",
      settings: {
        source: "public/*",
        strip_components: 1, # Remove 'public/' suffix from file paths
        rm: true, # Remove previous files from target directory
        host: { from_secret: "ssh_host" },
        target: { from_secret: "ssh_target" + if isDev then "_dev" else "" },
        username: { from_secret: "ssh_user" },
        key: { from_secret: "ssh_key" },
        port: { from_secret: "ssh_port" },
      },
    },
    {
      name: "notify",
      image: "plugins/matrix",
      settings: {
        homeserver: { from_secret: "matrix_homeserver" },
        roomid: { from_secret: "matrix_roomid" },
        username: { from_secret: "matrix_username" },
        password: { from_secret: "matrix_password" },
      },
      trigger: { status: [ "failure", "success", ] },
    },
  ]
 };
 [
  Pipeline(false),
  Pipeline(true),
 ]
--- a/.envrc
+++ b/.envrc
@ -1 +1,8 @@
 use_flake() {
  watch_file flake.nix
  watch_file flake.lock
  eval "$(nix print-dev-env)"
 }
 use flake
 eval "$shellHooks"
--- a/.markdownlint.yaml
+++ b/.markdownlint.yaml
@ -1,3 +0,0 @@
 # MD024/no-duplicate-heading/no-duplicate-header
 MD024:
  siblings_only: true
--- a/.woodpecker/deploy.yml
+++ b/.woodpecker/deploy.yml
@ -1,64 +0,0 @@
 labels:
  backend: local
 matrix:
  include:
    - TYPE: dev
      MAKE_TARGET: build-dev
      SSH_TARGET: ssh_target_dev
    - TYPE: prod
      MAKE_TARGET: build-prod
      SSH_TARGET: ssh_target
 # Run the correct matrix build on the correct branch
 when:
  evaluate: |
    ((CI_COMMIT_BRANCH == CI_REPO_DEFAULT_BRANCH) == ("${TYPE}" == "prod"))
 steps:
 - name: check
  image: bash
  commands:
  - nix flake check
 - name: build (${TYPE})
  image: bash
  commands:
  # If dev, include drafts and future articles, change base URL
  - nix develop -c make ${MAKE_TARGET}
 - name: deploy (${TYPE})
  image: bash
  environment:
    # Trailing slash to synchronize the folder's *content* to the target
    SYNC_SOURCE: public/
    SYNC_KEY:
      from_secret: ssh_key
    SYNC_PORT:
      from_secret: ssh_port
    SYNC_TARGET:
      from_secret: ${SSH_TARGET}
    SYNC_USERNAME:
      from_secret: ssh_user
    SYNC_HOST:
      from_secret: ssh_host
  commands:
  - "nix run github:ambroisie/nix-config#drone-rsync"
 - name: notify
  image: bash
  environment:
    ADDRESS:
      from_secret: matrix_homeserver
    ROOM:
      from_secret: matrix_roomid
    USER:
      from_secret: matrix_username
    PASS:
      from_secret: matrix_password
  commands:
  - nix run github:ambroisie/matrix-notifier
  when:
    status:
    - failure
    - success
--- a/7
+++ b/7
@ -3,7 +3,7 @@ all: build-dev
 .PHONY: build-dev
 build-dev:
-	HUGO_TITLE="Ambroisie's dev blog" HUGO_BASEURL=https://dev.belanyi.fr hugo -D -F
+	HUGO_BASEURL=https://dev.belanyi.fr hugo -D -F
 .PHONY: build-prod
 build-prod:
@ -13,6 +13,11 @@ build-prod:
 serve:
 	hugo server -D -F
 deploy:
 	@if [ -n "$$KEY" ]; then eval "$$(ssh-agent)"; echo "$$KEY" | ssh-add -; fi
 	if [ -z "$$USERNAME" ] || [ -z "$$SSH_HOST" ] || [ -z "$$TARGET" ]; then exit 1; fi
 	rsync --progress -avz --delete public/ "$$USERNAME@$$SSH_HOST:$$TARGET"
 .PHONY: clean
 clean:
 	$(RM) -r public
--- a/archetypes/default.md
+++ b/archetypes/default.md
@ -5,18 +5,15 @@ draft: false # I don't care for draft mode, git has branches for that
 description: ""
 tags:
  - accounting
  - algorithms
  - c++
  - ci/cd
  - cli
  - data structures
  - design-pattern
  - docker
  - drone
  - git
  - hugo
  - nix
  - python
  - self-hosting
  - test
 categories:
--- a/config.yaml
+++ b/config.yaml
@ -6,6 +6,8 @@ theme:
  - "hugo-atom-feed"
  - "anubis"
 paginate: 5
 disqusShortname: ""
 googleAnalytics: ""
 enableRobotsTXT: true
 enableEmoji: true
@ -35,12 +37,11 @@ menu:
 author:
  name: "Bruno BELANYI"
  email: "contact-blog@belanyi.fr"
-  github: "ambroisie"
+  github: "Ambroisie"
-  gitlab: "ambroisie"
+  gitlab: "Ambroisie"
  sourcehut: "ambroisie"
  linkedin: "bruno-belanyi"
  matrix: "@ambroisie:belanyi.fr"
  mastodon: "nixos.paris/@ambroisie"
 permalinks:
  posts: /:year/:month/:day/:title/
@ -65,18 +66,11 @@ params:
  webmentions:
    login: belanyi.fr
    pingback: true
  mathjax: true
 services:
  disqus:
    shortname: ""
  googleAnalytics:
    ID: ""
 taxonomies:
  category: "categories"
  tag: "tags"
-  series: "series"
+  serie: "series"
 markup:
  goldmark:
--- a/content/about.md
+++ b/content/about.md
@ -4,13 +4,7 @@ description: "About me"
 date: 2020-07-14
 ---
-I'm currently working as a Senior Software Engineer at [Google][google], as part
+I'm a CS student at EPITA.
 of their Embedded Graphics Drivers team for Pixel devices.
 [google]: https://www.linkedin.com/company/google/mycompany/verification/
 You can find my CV [here](https://cv.belanyi.fr/en.pdf). Or
 [here](https://cv.belanyi.fr/fr.pdf) for the french version.
 If you are a friend or family, you might be looking for my [wish
 lists]({{< ref "wish-lists.md" >}}) to find some present ideas.
--- a/content/posts/2021-10-01-magic-conversions-in-c++/index.md
+++ b/content/posts/2021-10-01-magic-conversions-in-c++/index.md
@ -1,172 +0,0 @@
 ---
 title: "Magic Conversions in C++"
 date: 2021-10-01T14:46:14+02:00
 draft: false # I don't care for draft mode, git has branches for that
 description: "How to get the compiler to infer the correct conversion"
 tags:
  - c++
  - design-pattern
 categories:
  - programming
 series:
 favorite: false
 disable_feed: false
 ---
 One feature that I like a lot in [Rust][rust-lang] is return type polymorphism,
 best exemplified with the following snippet of code:
 ```rust
 use std::collections::HashSet;
 fn main() {
    let vec: Vec<_> = (0..10).filter(|a| a % 2 == 0).collect();
    let set: HashSet<_> = (0..10).filter(|a| a % 2 == 0).collect();
    println!("vec: {:?}", vec);
    println!("set: {:?}", set); 
 }
 ```
 We have the same expression (`(0..10).filter(|a| a % 2 == 0).collect()`) that
 results in two totally different types of values (a `Vec` and a `HashSet`)!
 This is because Rust allows you to write a function which is generic in its
 *return type*, which is a super-power that C++ does not have. But is there a way
 to emulate this behaviour with some clever code?
 [rust-lang]: https://rust-lang.org/
 <!--more-->
 ## The problem
 For the purposes of this article, the problem that I am trying to solve will be
 the following:
 ```c++
 void takes_small_array(std::array<char, 32> arr);
 void takes_big_array(std::array<char, 4096> arr);
 // How to define a `to_array` function so that the following works?
 void test(std::string_view s) {
    takes_small_array(to_array(s));
    takes_big_array(to_array(s));
 }
 ```
 ## First attempt
 If we try to solve this in a way similar to Rust, we hit a problem in what the
 language allows us to write:
 ```c++
 std::array<char, 32> to_array(std::string_view s) {
    std::array<char, 32> ret;
    std::copy(s.begin(), s.end(), ret.begin());
    return ret;
 }
 std::array<char, 4096> to_array(std::string_view s) {
    std::array<char, 4096> ret;
    std::copy(s.begin(), s.end(), ret.begin());
    return ret;
 }
 ```
 The compiler complains with the following error:
 ```none
 ambiguating new declaration of 'std::array<char, 4096> to_array(std::string_view)'
 note: old declaration 'std::array<char, 32> to_array(std::string_view)'
 ```
 That is because C++ does **not** allow you to write an overload set based on
 *return type only*.
 ## Using templates
 For our second try, we want to use *non-type template parameters* to solve the
 issue. We write the following:
 ```c++
 template <size_t N>
 std::array<char, N> to_array(std::string_view s) {
    std::array<char, N> ret;
    std::copy(s.begin(), s.end(), ret.begin());
    return ret;
 }
 ```
 The compiler does not complain when we write this! We have also solved two minor
 issues with the previous try: the size of the arrays are not hard-coded, and we
 kept the code DRY.
 However we have some trouble trying to use those functions as stated in the
 beginning of the problem, with the following error message:
 ```none
 error: no matching function for call to 'to_array(std::string_view&)'
      |     takes_small_array(to_array(s));
 note: candidate: 'template<size_t N> std::array<char, N> to_array(std::string_view)'
      | std::array<char, N> to_array(std::string_view s) {
 note:   template argument deduction/substitution failed:
 note:   couldn't deduce template parameter 'N'
 ```
 The compiler cannot deduce the size of the array we want to use! We could solve
 the issue by explicitly giving a size when calling the function
 (`to_array<32>(s)`) however this is unsatisfactory: we are not solving the
 problem as stated initially, which could for example lead to needless churning
 if we change the signature of `takes_small_array` to instead use
 `std::array<char, 64>`).
 Thankfully there is a way to use the compiler to our advantage, and have it
 deduce it for us, but it involves some trickery.
 ## The solution
 We want to write a function that resolves the previous two issues we
 experienced:
 * The non-type template parameter must be deduced by the end of the call to
 `to_array`, but we can only deduce it once it is being consumed by
 `takes_{small,big}_array` -- which is too late for the compiler.
 * We cannot overload on the return type, which means we must return a single
 type from the function.
 The goal is to delay *when* the deduction of the array's size is happening,
 which can be done by using a *templated conversion operator*.
 So the solution to our problem is to do the following:
 ```c++
 class ToArray {
    std::string_view s_;
 public:
    ToArray(std::string_view s) : s_(s) {}
    template <size_t N>
    operator std::array<char, N>() const {
        std::array<char, N> ret;
        std::copy(s_.begin(), s_.end(), ret.begin());
        return ret;
    }
 }
 ToArray to_array(std::string_view s) {
    return ToArray{s};
 }
 ```
 The following steps happen when trying to call `takes_small_array(to_array(s))`:
 * `to_array(s)` returns a `ToArray` value.
 * the `ToArray` value is not an `array<char, 32>`, but has an implicit
 conversion operator, which the compiler invokes.
 * `takes_small_array` is called with the converted `array<char, 32>` value.
 We now have a "magic" function which can convert a `string_view` to an
 `std::array` of characters of any size. We could further improve this by
 ensuring that the array is terminated with a `'\0'`, throwing an exception when
 the array is too small for the given string, etc... This is left as an exercise
 to the reader.
--- a/content/posts/2022-11-02-multiple-dispatch-in-c++/index.md
+++ b/content/posts/2022-11-02-multiple-dispatch-in-c++/index.md
@ -1,329 +0,0 @@
 ---
 title: "Multiple Dispatch in C++"
 date: 2022-11-02T16:36:53+01:00
 draft: false # I don't care for draft mode, git has branches for that
 description: "A Lisp super-power in C++"
 tags:
  - c++
  - design-pattern
 categories:
  - programming
 series:
 favorite: false
 disable_feed: false
 ---
 A great feature that can be used in more dynamic languages is *multiple
 dispatch*. Here's an example in [Julia][julia-lang] taken from the [Wikipedia
 article][wiki-multiple-dispatch].
 ```julia
 abstract type SpaceObject end
 struct Asteroid <: SpaceObject
    # Asteroid fields
 end
 struct Spaceship <: SpaceObject
    # Spaceship fields
 end
 collide_with(::Asteroid, ::Spaceship) = # Asteroid/Spaceship collision
 collide_with(::Spaceship, ::Asteroid) = # Spaceship/Asteroid collision
 collide_with(::Spaceship, ::Spaceship) = # Spaceship/Spaceship collision
 collide_with(::Asteroid, ::Asteroid) = # Asteroid/Asteroid collision
 collide(x::SpaceObject, y::SpaceObject) = collide_with(x, y)
 ```
 The `collide` function calls `collide_with` which, at runtime, will inspect the
 types of its arguments and *dispatch* to the appropriate implementation.
 Julia was created with multiple dispatch as a first-class citizen, it is used
 liberally in its ecosystem. C++ does not have access to such a feature natively,
 but there are alternatives that I will be presenting in this article, and try to
 justify there uses and limitations.
 [julia-lang]: https://julialang.org/
 [wiki-multiple-dispatch]: https://en.wikipedia.org/wiki/Multiple_dispatch
 <!--more-->
 ## Single dispatch
 The native way to perform dynamic dispatch in C++ is through the
 use of *virtual methods*, which allows an object to *override* the behaviour of
 one of its super-classes' method.
 Invoking a virtual method will perform *single dispatch*, on the dynamic type
 of the object who's method is being called.
 Here is an example:
 ```cpp
 struct SpaceObject {
    virtual ~SpaceObject() = default;
    // Pure virtual method, which must be overridden by non-abstract sub-classes
    virtual void impact() = 0;
 };
 struct Asteroid : SpaceObject {
    // Override the method for asteroid impacts
    void impact() override {
        std::cout << "Bang!\n";
    }
 };
 struct Spaceship : SpaceObject {
    // Override the method for spaceship impacts
    void impact() override {
        std::cout << "Crash!\n";
    }
 };
 int main() {
    std::unique_ptr<SpaceObject> object = std::make_unique<Spaceship>();
    object->impact(); // Prints "Crash!"
    object = std::make_unique<Asteroid>();
    object->impact(); // Prints "Bang!"
 }
 ```
 Virtual methods are great when you want to represent a common set of behaviour
 (an *interface*), and be able to substitute various types with their specific
 implementation.
 For example, a dummy file-system interface might look like the following:
 ```cpp
 struct Filesystem {
    virtual void write(std::string_view filename, std::span<char> data) = 0;
    virtual std::vector<char> read(std::string_view filename) = 0;
    virtual void delete(std::string_view filename) = 0;
 };
 ```
 You can then write `PosixFilesystem` which makes use of the POSIX API and
 interact with actual on-disk data, `MockFilesystem` which only works in-memory
 and can be used for testing, etc...
 ## Double dispatch through the Visitor pattern
 Sometimes single dispatch is not enough, such as in the collision example at the
 beginning of this article. In cases where a computation depends on the dynamic
 type of *two* of its values, we can make use of double-dispatch by leveraging
 the Visitor design pattern. This is done by calling a virtual method on the
 first value, which itself will call a virtual method on the second value.
 Here's a commentated example:
 ```cpp
 struct Asteroid;
 struct Spaceship;
 struct SpaceObject {
    virtual ~SpaceObject() = default;
    // Only used to kick-start the double-dispatch process
    virtual void collide_with(SpaceObject& other) = 0;
    // The actual dispatching methods
    virtual void collide_with(Asteroid& other) = 0;
    virtual void collide_with(Spaceship& other) = 0;
 };
 struct Asteroid : SpaceObject {
    void collide_with(SpaceObject& other) override {
        // `*this` is an `Asteroid&` which kick-starts the double-dispatch
        other.collide_with(*this);
    };
    void collide_with(Asteroid& other) override { /* Asteroid/Asteroid */ };
    void collide_with(Spaceship& other) override { /* Asteroid/Spaceship */ };
 };
 struct Spaceship : SpaceObject {
    void collide_with(SpaceObject& other) override {
        // `*this` is a `Spaceship&` which kick-starts the double-dispatch
        other.collide_with(*this);
    };
    void collide_with(Asteroid& other) override { /* Spaceship/Asteroid */ };
    void collide_with(Spaceship& other) override { /* Spaceship/Spaceship */ };
 };
 void collide(SpaceObject& first, SpaceObject& second) {
    first.collide_with(second);
 };
 int main() {
    auto asteroid = std::make_unique<Asteroid>();
    auto spaceship = std::make_unique<Spaceship>();
    collide(*asteroid, *spaceship);
    // Calls in order:
    // - Asteroid::collide_with(SpaceObject&)
    // - Spaceship::collide_with(Asteroid&)
    collide(*spaceship, *asteroid);
    // Calls in order:
    // - Spaceship::collide_with(SpaceObject&)
    // - Asteroid::collide_with(Spaceship&)
    asteroid->collide_with(*spaceship);
    // Only calls Asteroid::collide_with(Spaceship&)
    spaceship->collide_with(*asteroid);
    // Only calls Spaceship::collide_with(Asteroid&)
 }
 ```
 Double dispatch is pattern is most commonly used with the *visitor pattern*, in
 which a closed class hierarchy (the data) is separated from an open class
 hierarchy (the algorithms acting on that data). This is especially useful in
 e.g: compilers, where the AST class hierarchy represents the data *only*, and
 all compiler stages and optimization passes are programmed by a series of
 visitors.
 One downside of this approach is that if you want to add `SpaceStation` as
 a sub-class of `SpaceObject`, and handle its collisions with other
 `SpaceObject`s, you need to:
 * Implement all `collide_with` methods for this new class.
 * Add a new virtual method `collide_with(SpaceStation&)` and implement it on
  every sub-class.
 This can be inconvenient if your class hierarchy changes often.
 ## Multiple dispatch on a closed class hierarchy
 When even double dispatch is not enough, there is a way to do multiple dispatch
 in standard C++, included in the STL since C++17. However unlike the previous
 methods I showed, this one relies on using [`std::variant`][variant-cppref] and
 [`std::visit`][visit-cppref].
 [variant-cppref]: https://en.cppreference.com/w/cpp/utility/variant
 [visit-cppref]: https://en.cppreference.com/w/cpp/utility/variant/visit
 The limitation of `std::variant` is that you are limited to the types you can
 select at *compile-time* for the values used during your dispatch operation.
 You have a *closed* hierarchy of classes, which is the explicit list of types in
 your `variant`.
 Nonetheless, if you can live with that limitation, then you have a great amount
 of power available to you. I have used `std::visit` in the past to mimic the
 effect of pattern matching.
 In this example, I re-create the double-dispatch from the previous section:
 ```cpp
 // No need to inherit from a `SpaceObject` base class
 struct Asteroid {};
 struct Spaceship {};
 // But the list of possible runtime *must* be enumerated at compile-time
 using SpaceObject = std::variant<Asteroid, Spaceship>;
 void collide(SpaceObject& first, SpaceObject& second) {
    struct CollideDispatch {
        void operator()(Asteroid& first, Asteroid& second) {
            // Asteroid/Asteroid
        }
        void operator()(Asteroid& first, Spaceship& second) {
            // Asteroid/Spaceship
        }
        void operator()(Spaceship& first, Asteroid& second) {
            // Spaceship/Asteroid
        }
        void operator()(Spaceship& first, Spaceship& second) {
            // Spaceship/Spaceship
        }
    };
    std::visit(CollideDispatch(), first, second);
 }
 int main() {
    SpaceObject asteroid = Asteroid();
    SpaceObject spaceship = Spaceship();
    collide(asteroid, spaceship);
    // Calls CollideDispatch::operator()(Asteroid&, Spaceship&)
    collide(spaceship, asteroid);
    // Calls CollideDispatch::operator()(Spaceship&, Asteroid&)
 }
 ```
 Obviously, the issue with adding a new `SpaceStation` variant is once again
 apparent in this implementation. You will get a compile error unless you handle
 this new `SpaceStation` variant at every point you `visit` the `SpaceObject`s.
 ## The Expression Problem
 One issue we have not been able to move past in these examples is the
 [Expression Problem][expression-problem]. In two words, this means that we can't
 add a new data type (e.g: `SpaceStation`), or a new operation (e.g: `land_on`)
 to our current code without re-compiling it.
 [expression-problem]: https://en.wikipedia.org/wiki/Expression_problem
 This is the downside I was pointing out in our previous sections:
 * Data type extension: one can easily add a new `SpaceObject` child-class in the
  OOP version, but needs to modify each implementation if we want to add a new
  method to the `SpaceObject` interface to implement a new operation.
 * Operation extension: one can easily create a new function when using the
  `std::variant` based representation, as pattern-matching easily allows us to
  only handle the kinds of values we are interested in. But adding a new
  `SpaceObject` variant means we need to modify and re-compile every
  `std::visit` call to handle the new variant.
 There is currently no (good) way in standard C++ to tackle the Expression
 Problem. A paper ([N2216][N2216]) was written to propose a new language feature
 to improve the situation. However it looks quite complex, and never got followed
 up on for standardization.
 [N2216]: https://open-std.org/jtc1/sc22/wg21/docs/papers/2007/n2216.pdf
 In the meantime, one can find some libraries (like [`yomm2`][yomm2]) that
 reduce the amount of boiler-plate needed to emulate this feature.
 [yomm2]: https://github.com/jll63/yomm2
 ```cpp
 #include <yorel/yomm2/keywords.hpp>
 struct SpaceObject {
    virtual ~SpaceObject() = default;
 };
 struct Asteroid : SpaceObject { /* fields, methods, etc... */ };
 struct Spaceship : SpaceObject { /* fields, methods, etc... */ };
 // Register all sub-classes of `SpaceObject` for use with open methods
 register_classes(SpaceObject, Asteroid, Spaceship);
 // Register the `collide` open method, which dispatches on two arguments
 declare_method(void, collide, (virtual_<SpaceObject&>, virtual_<SpaceObject&>));
 // Write the different implementations of `collide`
 define_method(void, collide, (Asteroid& left, Asteroid& right)) { /* work */ }
 define_method(void, collide, (Asteroid& left, Spaceship& right)) { /* work */ }
 define_method(void, collide, (Spaceship& left, Asteroid& right)) { /* work */ }
 define_method(void, collide, (Spaceship& left, Spaceship& right)) { /* work */ }
 int main() {
    yorel::yomm2::update_methods();
    auto asteroid = std::make_unique<Asteroid>();
    auto spaceship = std::make_unique<Spaceship>();
    collide(*asteroid, *spaceship); // Calls (Asteroid, Spaceship) version
    collide(*spaceship, *asteroid); // Calls (Spaceship, Asteroid) version
    collide(*asteroid, *asteroid); // Calls (Asteroid, Asteroid) version
    collide(*spaceship, *spaceship); // Calls (Spaceship, Spaceship) version
 }
 ```
--- a/content/posts/2024-06-24-union-find/index.md
+++ b/content/posts/2024-06-24-union-find/index.md
@ -1,157 +0,0 @@
 ---
 title: "Union Find"
 date: 2024-06-24T21:07:49+01:00
 draft: false # I don't care for draft mode, git has branches for that
 description: "My favorite data structure"
 tags:
  - algorithms
  - data structures
  - python
 categories:
  - programming
 series:
  - Cool algorithms
 favorite: false
 disable_feed: false
 ---
 To kickoff the [series]({{< ref "/series/cool-algorithms/" >}}) of posts about
 algorithms and data structures I find interesting, I will be talking about my
 favorite one: the [_Disjoint Set_][wiki]. Also known as the _Union-Find_ data
 structure, so named because of its two main operations: `ds.union(lhs, rhs)` and
 `ds.find(elem)`.
 [wiki]: https://en.wikipedia.org/wiki/Disjoint-set_data_structure
 <!--more-->
 ## What does it do?
 The _Union-Find_ data structure allows one to store a collection of sets of
 elements, with operations for adding new sets, merging two sets into one, and
 finding the representative member of a set. Not only does it do all that, but it
 does it in almost constant (amortized) time!
 Here is a small motivating example for using the _Disjoint Set_ data structure:
 ```python
 def connected_components(graph: Graph) -> list[set[Node]]:
    # Initialize the disjoint set so that each node is in its own set
    ds: DisjointSet[Node] = DisjointSet(graph.nodes)
    # Each edge is a connection, merge both sides into the same set
    for (start, dest) in graph.edges:
        ds.union(start, dest)
    # Connected components share the same (arbitrary) root
    components: dict[Node, set[Node]] = defaultdict(set)
    for n in graph.nodes:
        components[ds.find(n)].add(n)
    # Return a list of disjoint sets corresponding to each connected component
    return list(components.values())
 ```
 ## Implementation
 I will show how to implement `UnionFind` for integers, though it can easily be
 extended to be used with arbitrary types (e.g: by mapping each element
 one-to-one to a distinct integer, or using a different set representation).
 ### Representation
 Creating a new disjoint set is easy enough:
 ```python
 class UnionFind:
    _parent: list[int]
    _rank: list[int]
    def __init__(self, size: int):
        # Each node is in its own set, making it its own parent...
        self._parents = list(range(size))
        # ... And its rank 0
        self._rank = [0] * size
 ```
 We represent each set through the `_parent` field: each element of the set is
 linked to its parent, until the root node which is its own parent. When first
 initializing the structure, each element is in its own set, so we initialize
 each element to be a root and make it its own parent (`_parent[i] == i` for all
 `i`).
 The `_rank` field is an optimization which we will touch on in a later section.
 ### Find
 A naive Implementation of `find(...)` is simple enough to write:
 ```python
 def find(self, elem: int) -> int:
    # If `elem` is its own parent, then it is the root of the tree
    if (parent := self._parent[elem]) == elem:
        return elem
    # Otherwise, recurse on the parent
    return self.find(parent)
 ```
 However, going back up the chain of parents each time we want to find the root
 node (an `O(n)` operation) would make for disastrous performance. Instead we can
 do a small optimization called _path splitting_.
 ```python
 def find(self, elem: int) -> int:
    while (parent := self._parent[elem]) != elem:
        # Replace each parent link by a link to the grand-parent
        elem, self._parent[elem] = parent, self._parent[parent]
    return elem
 ```
 This flattens the chain so that each node links more directly to the root (the
 length is reduced by half), making each subsequent `find(...)` faster.
 Other compression schemes exist, along the spectrum between faster shortening
 the chain faster earlier, or updating `_parent` fewer times per `find(...)`.
 ### Union
 A naive implementation of `union(...)` is simple enough to write:
 ```python
 def union(self, lhs: int, rhs: int) -> int:
    # Replace both element by their root parent
    lhs = self.find(lhs)
    rhs = self.find(rhs)
    # arbitrarily merge one into the other
    self._parent[rhs] = lhs
    # Return the new root
    return lhs
 ```
 Once again, improvements can be made. Depending on the order in which we call
 `union(...)`, we might end up creating a long chain from the leaf of the tree to
 the root node, leading to slower `find(...)` operations. If at all possible, we
 would like to keep the trees as shallow as possible.
 To do so, we want to avoid merging taller trees into smaller ones, so as to keep
 them as balanced as possible. Since a higher tree will result in a slower
 `find(...)`, keeping the trees balanced will lead to increased performance.
 This is where the `_rank` field we mentioned earlier comes in: the _rank_ of an
 element is an upper bound on its height in the tree. By keeping track of this
 _approximate_ height, we can keep the trees balanced when merging them.
 ```python
 def union(self, lhs: int, rhs: int) -> int:
    lhs = self.find(lhs)
    rhs = self.find(rhs)
    # Bail out early if they already belong to the same set
    if lhs == rhs:
      return lhs
    # Always keep `lhs` as the taller tree
    if (self._rank[lhs] < self._rank[rhs])
        lhs, rhs = rhs, lhs
    # Merge the smaller tree into the taller one
    self._parent[rhs] = lhs
    # Update the rank when merging trees of approximately the same size
    if self._rank[lhs] == self._rank[rhs]:
        self._rank[lhs] += 1
    return lhs
 ```
--- a/content/posts/2024-06-30-trie/index.md
+++ b/content/posts/2024-06-30-trie/index.md
@ -1,171 +0,0 @@
 ---
 title: "Trie"
 date: 2024-06-30T11:07:49+01:00
 draft: false # I don't care for draft mode, git has branches for that
 description: "A cool map"
 tags:
  - algorithms
  - data structures
  - python
 categories:
  - programming
 series:
  - Cool algorithms
 favorite: false
 disable_feed: false
 ---
 This time, let's talk about the [_Trie_][wiki], which is a tree-based mapping
 structure most often used for string keys.
 [wiki]: https://en.wikipedia.org/wiki/Trie
 <!--more-->
 ## What does it do?
 A _Trie_ can be used to map a set of string keys to their corresponding values,
 without the need for a hash function. This also means you won't suffer from hash
 collisions, though the tree-based structure will probably translate to slower
 performance than a good hash table.
 A _Trie_ is especially useful to represent a dictionary of words in the case of
 spell correction, as it can easily be used to fuzzy match words under a given
 edit distance (think [Levenshtein distance])
 [Levenshtein distance]: https://en.wikipedia.org/wiki/Levenshtein_distance
 ## Implementation
 This implementation will be in Python for exposition purposes, even though
 it already has a built-in `dict`.
 ### Representation
 Creating a new `Trie` is easy: the root node starts off empty and without any
 mapped values.
 ```python
 class Trie[T]:
    _children: dict[str, Trie[T]]
    _value: T | None
    def __init__(self):
        # Each letter is mapped to a Trie
        self._children = defaultdict(Trie)
        # If we match a full string, we store the mapped value
        self._value = None
 ```
 We're using a `defaultdict` for the children for ease of implementation in this
 post. In reality, I would encourage you exit early when you can't match a given
 character.
 The string key will be implicit by the position of a node in the tree: the empty
 string at the root, one-character strings as its direct children, etc...
 ### Search
 An exact match look-up is easily done: we go down the tree until we've exhausted
 the key. At that point we've either found a mapped value or not.
 ```python
 def get(self, key: str) -> T | None:
    # Have we matched the full key?
    if not key:
        # Store the `T` if mapped, `None` otherwise
        return self._value
    # Otherwise, recurse on the child corresponding to the first letter
    return self._children[key[0]].get(key[1:])
 ```
 ### Insertion
 Adding a new value to the _Trie_ is similar to a key lookup, only this time we
 store the new value instead of returning it.
 ```python
 def insert(self, key: str, value: T) -> bool:
    # Have we matched the full key?
    if not key:
        # Check whether we're overwriting a previous mapping
        was_mapped = self._value is None
        # Store the corresponding value
        self._value = value
        # Return whether we've performed an overwrite
        return was_mapped
      # Otherwise, recurse on the child corresponding to the first letter
      return self._children[key[0]].insert(key[1:], value)
 ```
 ### Removal
 Removal should also look familiar.
 ```python
 def remove(self, key: str) -> bool:
    # Have we matched the full key?
    if not key:
        was_mapped = self._value is None
        # Remove the value
        self._value = None
        # Return whether it was mapped
        return was_mapped
    # Otherwise, recurse on the child corresponding to the first letter
    return self._children[key[0]].remove(key[1:])
 ```
 ### Fuzzy matching
 Fuzzily matching a given word is where the real difficulty is: the key is to
 realize we can use the prefix-tree nature of a _Trie_ to avoid doing wasteful
 work.
 By leveraging the prefix visit order of the tree, we can build an iterative
 Levenshtein distance matrix, in much the same way one would do so in its
 [Dynamic Programming] implementation (see the [Wagner-Fisher algorithm]).
 [Dynamic Programming]: https://en.wikipedia.org/wiki/Dynamic_programming
 [Wagner-Fisher algorithm]: https://en.wikipedia.org/wiki/Wagner%E2%80%93Fischer_algorithm
 ```python
 class FuzzyResult[T](NamedTuple):
    distance: int
    key: str
    value: T
 def get_fuzzy(self, key: str, max_distance: int = 0) -> Iterator[FuzzyResult[T]]:
    def helper(
        current_word: str,
        node: Trie[T],
        previous_row: list[int],
    ) -> Iterator[tuple[int, T]]:
        # Iterative Levenshtein
        current_row = [previous_row[0] + 1]
        current_char = current_word[-1]
        for column, key_char in enumerate(key, start=1):
            insertion = current_row[column - 1] + 1
            deletion = previous_row[column] + 1
            replacement = previous_row[column - 1] + (key_char != current_char)
            current_row.append(min(insertion, deletion, replacement))
        # If we are under the max distance, match this node
        if (distance := current_row[-1]) <= max_distance and node._value != None:
            # Only if it has a value of course
            yield FuzzyResult(distance, current_word, node._value)
        # If we can potentially still match children, recurse
        if min(current_row) <= max_distance:
            for c, child in node._children.items():
                yield from helper(current_word + c, child, current_row)
    # Build the first row -- the edit distance from the empty string
    row = list(range(len(key) + 1))
    # Base case for the empty string
    if (distance := row[-1]) <= max_distance and self._value != None:
        yield FuzzyResult(distance, "", self._value)
    for c, child in self._children.items():
        yield from helper(c, child, row)
 ```
--- a/content/posts/2024-07-06-gap-buffer/index.md
+++ b/content/posts/2024-07-06-gap-buffer/index.md
@ -1,191 +0,0 @@
 ---
 title: "Gap Buffer"
 date: 2024-07-06T21:27:19+01:00
 draft: false # I don't care for draft mode, git has branches for that
 description: "As featured in GNU Emacs"
 tags:
  - algorithms
  - data structures
  - python
 categories:
  - programming
 series:
  - Cool algorithms
 favorite: false
 disable_feed: false
 ---
 The [_Gap Buffer_][wiki] is a popular data structure for text editors to
 represent files and editable buffers. The most famous of them probably being
 [GNU Emacs][emacs].
 [wiki]: https://en.wikipedia.org/wiki/Gap_buffer
 [emacs]: https://www.gnu.org/software/emacs/manual/html_node/elisp/Buffer-Gap.html
 <!--more-->
 ## What does it do?
 A _Gap Buffer_ is simply a list of characters, similar to a normal string, with
 the added twist of splitting it into two side: the prefix and suffix, on either
 side of the cursor. In between them, a gap is left to allow for quick
 insertion at the cursor.
 Moving the cursor moves the gap around the buffer, the prefix and suffix getting
 shorter/longer as required.
 ## Implementation
 I'll be writing a sample implementation in Python, as with the rest of the
 [series]({{< ref "/series/cool-algorithms/" >}}). I don't think it showcases the
 elegance of the _Gap Buffer_ in action like a C implementation full of
 `memmove`s would, but it does makes it short and sweet.
 ### Representation
 We'll be representing the gap buffer as an actual list of characters.
 Given that Python doesn't _have_ characters, let's settle for a list of strings,
 each representing a single character...
 ```python
 Char = str
 class GapBuffer:
    # List of characters, contains prefix and suffix of string with gap in the middle
    _buf: list[Char]
    # The gap is contained between [start, end) (i.e: buf[start:end])
    _gap_start: int
    _gap_end: int
    # Visual representation of the gap buffer:
    # This is a very  [                     ]long string.
    # |<----------------------------------------------->| capacity
    # |<------------>|                       |<-------->| string
    #                 |<------------------->|             gap
    # |<------------>|                                    prefix
    #                                        |<-------->| suffix
    def __init__(self, initial_capacity: int = 16) -> None:
        assert initial_capacity > 0
        # Initialize an empty gap buffer
        self._buf = [""] * initial_capacity
        self._gap_start = 0
        self._gap_end = initial_capacity
 ```
 ### Accessors
 I'm mostly adding these for exposition, and making it easier to write `assert`s
 later.
 ```python
@property
 def capacity(self) -> int:
  return len(self._buf)
@property
 def gap_length(self) -> int:
  return self._gap_end - self._gap_start
@property
 def string_length(self) -> int:
  return self.capacity - self.gap_length
@property
 def prefix_length(self) -> int:
  return self._gap_start
@property
 def suffix_length(self) -> int:
  return self.capacity - self._gap_end
 ```
 ### Growing the buffer
 I've written this method in a somewhat non-idiomatic manner, to make it closer
 to how it would look in C using `realloc` instead.
 It would be more efficient to use slicing to insert the needed extra capacity
 directly, instead of making a new buffer and copying characters over.
 ```python
 def grow(self, capacity: int) -> None:
    assert capacity >= self.capacity
    # Create a new buffer with the new capacity
    new_buf = [""] * capacity
    # Move the prefix/suffix to their place in the new buffer
    added_capacity = capacity - len(self._buf)
    new_buf[: self._gap_start] = self._buf[: self._gap_start]
    new_buf[self._gap_end + added_capacity :] = self._buf[self._gap_end :]
    # Use the new buffer, account for added capacity
    self._buf = new_buf
    self._gap_end += added_capacity
 ```
 ### Insertion
 Inserting text at the cursor's position means filling up the gap in the middle
 of the buffer. To do so we must first make sure that the gap is big enough, or
 grow the buffer accordingly.
 Then inserting the text is simply a matter of copying its characters in place,
 and moving the start of the gap further right.
 ```python
 def insert(self, val: str) -> None:
    # Ensure we have enough space to insert the whole string
    if len(val) > self.gap_length:
        self.grow(max(self.capacity * 2, self.string_length + len(val)))
    # Fill the gap with the given string
    self._buf[self._gap_start : self._gap_start + len(val)] = val
    self._gap_start += len(val)
 ```
 ### Deletion
 Removing text from the buffer simply expands the gap in the corresponding
 direction, shortening the string's prefix/suffix. This makes it very cheap.
 The methods are named after the `backspace` and `delete` keys on the keyboard.
 ```python
 def backspace(self, dist: int = 1) -> None:
    assert dist <= self.prefix_length
    # Extend gap to the left
    self._gap_start -= dist
 def delete(self, dist: int = 1) -> None:
    assert dist <= self.suffix_length
    # Extend gap to the right
    self._gap_end += dist
 ```
 ### Moving the cursor
 Moving the cursor along the buffer will shift letters from one side of the gap
 to the other, moving them across from prefix to suffix and back.
 I find Python's list slicing not quite as elegant to read as a `memmove`, though
 it does make for a very small and efficient implementation.
 ```python
 def left(self, dist: int = 1) -> None:
    assert dist <= self.prefix_length
    # Shift the needed number of characters from end of prefix to start of suffix
    self._buf[self._gap_end - dist : self._gap_end] = self._buf[
        self._gap_start - dist : self._gap_start
    ]
    # Adjust indices accordingly
    self._gap_start -= dist
    self._gap_end -= dist
 def right(self, dist: int = 1) -> None:
    assert dist <= self.suffix_length
    # Shift the needed number of characters from start of suffix to end of prefix
    self._buf[self._gap_start : self._gap_start + dist] = self._buf[
        self._gap_end : self._gap_end + dist
    ]
    # Adjust indices accordingly
    self._gap_start += dist
    self._gap_end += dist
 ```
--- a/content/posts/2024-07-14-bloom-filter/index.md
+++ b/content/posts/2024-07-14-bloom-filter/index.md
@ -1,97 +0,0 @@
 ---
 title: "Bloom Filter"
 date: 2024-07-14T17:46:40+01:00
 draft: false # I don't care for draft mode, git has branches for that
 description: "Probably cool"
 tags:
  - algorithms
  - data structures
  - python
 categories:
  - programming
 series:
  - Cool algorithms
 favorite: false
 disable_feed: false
 ---
 The [_Bloom Filter_][wiki] is a probabilistic data structure for set membership.
 The filter can be used as an inexpensive first step when querying the actual
 data is quite costly (e.g: as a first check for expensive cache lookups or large
 data seeks).
 [wiki]: https://en.wikipedia.org/wiki/Bloom_filter
 <!--more-->
 ## What does it do?
 A _Bloom Filter_ can be understood as a hash-set which can either tell you:
 * An element is _not_ part of the set.
 * An element _may be_ part of the set.
 More specifically, one can tweak the parameters of the filter to make it so that
 the _false positive_ rate of membership is quite low.
 I won't be going into those calculations here, but they are quite trivial to
 compute, or one can just look up appropriate values for their use case.
 ## Implementation
 I'll be using Python, which has the nifty ability of representing bitsets
 through its built-in big integers quite easily.
 We'll be assuming a `BIT_COUNT` of 64 here, but the implementation can easily be
 tweaked to use a different number, or even change it at construction time.
 ### Representation
 A `BloomFilter` is just a set of bits and a list of hash functions.
 ```python
 BIT_COUNT = 64
 class BloomFilter[T]:
    _bits: int
    _hash_functions: list[Callable[[T], int]]
    def __init__(self, hash_functions: list[Callable[[T], int]]) -> None:
        # Filter is initially empty
        self._bits = 0
        self._hash_functions = hash_functions
 ```
 ### Inserting a key
 To add an element to the filter, we take the output from each hash function and
 use that to set a bit in the filter. This combination of bit will identify the
 element, which we can use for lookup later.
 ```python
 def insert(self, val: T) -> None:
    # Iterate over each hash
    for f in self._hash_functions:
        n = f(val) % BIT_COUNT
        # Set the corresponding bit
        self._bit |= 1 << n
 ```
 ### Querying a key
 Because the _Bloom Filter_ does not actually store its elements, but some
 derived data from hashing them, it can only definitely say if an element _does
 not_ belong to it. Otherwise, it _may_ be part of the set, and should be checked
 against the actual underlying store.
 ```python
 def may_contain(self, val: T) -> bool:
    for f in self._hash_functions:
        n = f(val) % BIT_COUNT
        # If one of the bits is unset, the value is definitely not present
        if not (self._bit & (1 << n)):
            return False
    # All bits were matched, `val` is likely to be part of the set
    return True
 ```
--- a/content/posts/2024-07-20-treap/index.md
+++ b/content/posts/2024-07-20-treap/index.md
@ -1,159 +0,0 @@
 ---
 title: "Treap"
 date: 2024-07-20T14:12:27+01:00
 draft: false # I don't care for draft mode, git has branches for that
 description: "A simpler BST"
 tags:
  - algorithms
  - data structures
  - python
 categories:
  - programming
 series:
  - Cool algorithms
 favorite: false
 disable_feed: false
 graphviz: true
 ---
 The [_Treap_][wiki] is a mix between a _Binary Search Tree_ and a _Heap_.
 Like a _Binary Search Tree_, it keeps an ordered set of keys in the shape of a
 tree, allowing for binary search traversal.
 Like a _Heap_, it associates each node with a priority, making sure that a
 parent's priority is always higher than any of its children.
 [wiki]: https://en.wikipedia.org/wiki/Treap
 <!--more-->
 ## What does it do?
 By randomizing the priority value of each key at insertion time, we ensure a
 high likelihood that the tree stays _roughly_ balanced, avoiding degenerating to
 unbalanced O(N) height.
 Here's a sample tree created by inserting integers from 0 to 250 into the tree:
 {{< graphviz file="treap.gv" />}}
 ## Implementation
 I'll be keeping the theme for this [series] by using Python to implement the
 _Treap_. This leads to somewhat annoying code to handle the rotation process,
 which is easier to do in C using pointers.
 [series]: {{< ref "/series/cool-algorithms/" >}}
 ### Representation
 Creating a new `Treap` is easy: the tree starts off empty, waiting for new nodes
 to insert.
 Each `Node` must keep track of the `key`, the mapped `value`, and the node's
 `priority` (which is assigned randomly). Finally it must also allow for storing
 two children (`left` and `right`).
 ```python
 class Node[K, V]:
    key: K
    value: V
    priority: float
    left: Node[K, V] | None
    righg: Node[K, V] | None
    def __init__(self, key: K, value: V):
        # Store key and value, like a normal BST node
        self.key = key
        self.value = value
        # Priority is derived randomly
        self.priority = random()
        self.left = None
        self.right = None
 class Treap[K, V]:
    _root: Node[K, V] | None
    def __init__(self):
        # The tree starts out empty
        self._root = None
 ```
 ### Search
 Searching the tree is the same as in any other _Binary Search Tree_.
 ```python
 def get(self, key: K) -> T | None:
    node = self._root
    # The usual BST traversal
    while node is not None:
        if node.key == key:
            return node.value
        elif node.key < key:
            node = node.right
        else:
            node = node.left
    return None
 ```
 ### Insertion
 To insert a new `key` into the tree, we identify which leaf position it should
 be inserted at. We then generate the node's priority, insert it at this
 position, and rotate the node upwards until the heap property is respected.
 ```python
 type ChildField = Literal["left, right"]
 def insert(self, key: K, value: V) -> bool:
    # Empty treap base-case
    if self._root is None:
        self._root = Node(key, value)
        # Signal that we're not overwriting the value
        return False
    # Keep track of the parent chain for rotation after insertion
    parents = []
    node = self._root
    while node is not None:
        # Insert a pre-existing key
        if node.key == key:
            node.value = value
            return True
        #  Go down the tree, keep track of the path through the tree
        field = "left" if key < node.key else "right"
        parents.append((node, field))
        node = getattr(node, field)
    #  Key wasn't found, we're inserting a new node
    child = Node(key, value)
    parent, field = parents[-1]
    setattr(parent, field, child)
    # Rotate the new node up until we respect the decreasing priority property
    self._rotate_up(child, parents)
    # Key wasn't found, signal that we inserted a new node
    return False
 def _rotate_up(
    self,
    node: Node[K, V],
    parents: list[tuple[Node[K, V], ChildField]],
 ) -> None:
    while parents:
        parent, field = parents.pop()
        # If the parent has higher priority, we're done rotating
        if parent.priority >= node.priority:
            break
        # Check for grand-parent/root of tree edge-case
        if parents:
            # Update grand-parent to point to the new rotated node
            grand_parent, field = parents[-1]
            setattr(grand_parent, field, node)
        else:
            # Point the root to the new rotated node
            self._root = node
        other_field = "left" if field == "right" else "right"
        # Rotate the node up
        setattr(parent, field, getattr(node, other_field))
        setattr(node, other_field, parent)
 ```
--- a/content/posts/2024-07-20-treap/treap.gv
+++ b/content/posts/2024-07-20-treap/treap.gv
--- a/content/posts/2024-07-27-treap-revisited/index.md
+++ b/content/posts/2024-07-27-treap-revisited/index.md
@ -1,146 +0,0 @@
 ---
 title: "Treap, revisited"
 date: 2024-07-27T14:12:27+01:00
 draft: false # I don't care for draft mode, git has branches for that
 description: "An even simpler BST"
 tags:
  - algorithms
  - data structures
  - python
 categories:
  - programming
 series:
  - Cool algorithms
 favorite: false
 disable_feed: false
 ---
 My [last post]({{< relref "../2024-07-20-treap/index.md" >}}) about the _Treap_
 showed an implementation using tree rotations, as is commonly done with [AVL
 Trees][avl] and [Red Black Trees][rb].
 But the _Treap_ lends itself well to a simple and elegant implementation with no
 tree rotations. This makes it especially easy to implement the removal of a key,
 rather than the fiddly process of deletion using tree rotations.
 [avl]: https://en.wikipedia.org/wiki/AVL_tree
 [rb]: https://en.wikipedia.org/wiki/Red%E2%80%93black_tree
 <!--more-->
 ## Implementation
 All operations on the tree will be implemented in terms of two fundamental
 operations: `split` and `merge`.
 We'll be reusing the same structures as in the last post, so let's skip straight
 to implementing those fundaments, and building on them for `insert` and
 `delete`.
 ### Split
 Splitting a tree means taking a key, and getting the following output:
 * a `left` node, root of the tree of all keys lower than the input.
 * an extracted `node` which corresponds to the input `key`.
 * a `right` node, root of the tree of all keys higher than the input.
 ```python
 type OptionalNode[K, V] = Node[K, V] | None
 class SplitResult(NamedTuple):
    left: OptionalNode
    node: OptionalNode
    right: OptionalNode
 def split(root: OptionalNode[K, V], key: K) -> SplitResult:
    # Base case, empty tree
    if root is None:
        return SplitResult(None, None, None)
    # If we found the key, simply extract left and right
    if root.key == key:
        left, right = root.left, root.right
        root.left, root.right = None, None
        return SplitResult(left, root, right)
    # Otherwise, recurse on the corresponding side of the tree
    if root.key < key:
        left, node, right = split(root.right, key)
        root.right = left
        return SplitResult(root, node, right)
    if key < root.key:
        left, node, right = split(root.left, key)
        root.left = right
        return SplitResult(left, node, root)
    raise RuntimeError("Unreachable")
 ```
 ### Merge
 Merging a `left` and `right` tree means (cheaply) building a new tree containing
 both of them. A pre-condition for merging is that the `left` tree is composed
 entirely of nodes that are lower than any key in `right` (i.e: as in `left` and
 `right` after a `split`).
 ```python
 def merge(
    left: OptionalNode[K, V],
    right: OptionalNode[K, V],
 ) -> OptionalNode[K, V]:
    # Base cases, left or right being empty
    if left is None:
        return right
    if right is None:
        return left
    # Left has higher priority, it must become the root node
    if left.priority >= right.priority:
        # We recursively reconstruct its right sub-tree
        left.right = merge(left.right, right)
        return left
    # Right has higher priority, it must become the root node
    if left.priority < right.priority:
        # We recursively reconstruct its left sub-tree
        right.left = merge(left, right.left)
        return right
    raise RuntimeError("Unreachable")
 ```
 ### Insertion
 Inserting a node into the tree is done in two steps:
 1. `split` the tree to isolate the middle insertion point
 2. `merge` it back up to form a full tree with the inserted key
 ```python
 def insert(self, key: K, value: V) -> bool:
    # `left` and `right` come before/after the key
    left, node, right = split(self._root, key)
    was_updated: bool
    # Create the node, or update its value, if the key was already in the tree
    if node is None:
        node = Node(key, value)
        was_updated = False
    else:
        node.value = value
        was_updated = True
    # Rebuild the tree with a couple of merge operations
    self._root = merge(left, merge(node, right))
    # Signal whether the key was already in the key
    return was_updated
 ```
 ### Removal
 Removing a key from the tree is similar to inserting a new key, and forgetting
 to insert it back: simply `split` the tree and `merge` it back without the
 extracted middle node.
 ```python
 def remove(self, key: K) -> bool:
    # `node` contains the key, or `None` if the key wasn't in the tree
    left, node, right = split(self._root, key)
    # Put the tree back together, without the extract node
    self._root = merge(left, right)
    # Signal whether `key` was mapped in the tree
    return node is not None
 ```
--- a/content/posts/2024-08-02-reservoir-sampling/index.md
+++ b/content/posts/2024-08-02-reservoir-sampling/index.md
@ -1,145 +0,0 @@
 ---
 title: "Reservoir Sampling"
 date: 2024-08-02T18:30:56+01:00
 draft: false # I don't care for draft mode, git has branches for that
 description: "Elegantly sampling a stream"
 tags:
  - algorithms
  - python
 categories:
  - programming
 series:
  - Cool algorithms
 favorite: false
 disable_feed: false
 mathjax: true
 ---
 [_Reservoir Sampling_][reservoir] is an [online][online], probabilistic
 algorithm to uniformly sample $k$ random elements out of a stream of values.
 It's a particularly elegant and small algorithm, only requiring $\Theta(k)$
 amount of space and a single pass through the stream.
 [reservoir]: https://en.wikipedia.org/wiki/Reservoir_sampling
 [online]: https://en.wikipedia.org/wiki/Online_algorithm
 <!--more-->
 ## Sampling one element
 As an introduction, we'll first focus on fairly sampling one element from the
 stream.
 ```python
 def sample_one[T](stream: Iterable[T]) -> T:
    stream_iter = iter(stream)
    # Sample the first element
    res = next(stream_iter)
    for i, val in enumerate(stream_iter, start=1):
        j = random.randint(0, i)
        # Replace the sampled element with probability 1/(i + 1)
        if j == 0:
            res = val
    # Return the randomly sampled element
    return res
 ```
 ### Proof
 Let's now prove that this algorithm leads to a fair sampling of the stream.
 We'll be doing proof by induction.
 #### Hypothesis $H_N$
 After iterating through the first $N$ items in the stream,
 each of them has had an equal $\frac{1}{N}$ probability of being selected as
 `res`.
 #### Base Case $H_1$
 We can trivially observe that the first element is always assigned to `res`,
 $\frac{1}{1} = 1$, the hypothesis has been verified.
 #### Inductive Case
 For a given $N$, let us assume that $H_N$ holds. Let us now look at the events
 of loop iteration where `i = N` (i.e: observation of the $N + 1$-th item in the
 stream).
 `j = random.randint(0, i)` uniformly selects a value in the range $[0, i]$,
 a.k.a $[0, N]$. We then have two cases:
 * `j == 0`, with probability $\frac{1}{N + 1}$: we select `val` as the new
 reservoir element `res`.
 * `j != 0`, with probability $\frac{N}{N + 1}$: we keep the previous value of
 `res`. By $H_N$, any of the first $N$ elements had a $\frac{1}{N}$ probability
 of being `res` before at the start of the loop, each element now has a
 probability $\frac{1}{N} \cdot \frac{N}{N + 1} = \frac{1}{N + 1}$ of being the
 element.
 And thus, we have proven $H_{N + 1}$ at the end of the loop.
 ## Sampling $k$ element
 The code for sampling $k$ elements is very similar to the one-element case.
 ```python
 def sample[T](stream: Iterable[T], k: int = 1) -> list[T]:
    stream_iter = iter(stream)
    # Retain the first 'k' elements in the reservoir
    res = list(itertools.islice(stream_iter, k))
    for i, val in enumerate(stream_iter, start=k):
        j = random.randint(0, i)
        # Replace one element at random with probability k/(i + 1)
        if j < k:
            res[j] = val
    # Return 'k' randomly sampled elements
    return res
 ```
 ### Proof
 Let us once again do a proof by induction, assuming the stream contains at least
 $k$ items.
 #### Hypothesis $H_N$
 After iterating through the first $N$ items in the stream, each of them has had
 an equal $\frac{k}{N}$ probability of being sampled from the stream.
 #### Base Case $H_k$
 We can trivially observe that the first $k$ element are sampled at the start of
 the algorithm, $\frac{k}{k} = 1$, the hypothesis has been verified.
 #### Inductive Case
 For a given $N$, let us assume that $H_N$ holds. Let us now look at the events
 of the loop iteration where `i = N`, in order to prove $H_{N + 1}$.
 `j = random.randint(0, i)` uniformly selects a value in the range $[0, i]$,
 a.k.a $[0, N]$. We then have three cases:
 * `j >= k`, with probability $1 - \frac{k}{N + 1}$: we do not modify the
 sampled reservoir at all.
 * `j < k`, with probability $\frac{k}{N + 1}$: we sample the new element to
 replace the `j`-th element of the reservoir. Therefore for any element
 $e \in [0, k[$ we can either have:
  * $j = e$: the element _is_ replaced, probability $\frac{1}{k}$.
  * $j \neq e$: the element is _not_ replaced, probability $\frac{k - 1}{k}$.
 We can now compute the probability that a previously sampled element is kept in
 the reservoir:
 $1 - \frac{k}{N + 1} + \frac{k}{N + 1} \cdot \frac{k - 1}{k} = \frac{N}{N + 1}$.
 By $H_N$, any of the first $N$ elements had a $\frac{k}{N}$ probability
 of being sampled before at the start of the loop, each element now has a
 probability $\frac{k}{N} \cdot \frac{N}{N + 1} = \frac{k}{N + 1}$ of being the
 element.
 We have now proven that all elements have a probability $\frac{k}{N + 1}$ of
 being sampled at the end of the loop, therefore $H_{N + 1}$ has been verified.
--- a/content/posts/2024-08-10-kd-tree/index.md
+++ b/content/posts/2024-08-10-kd-tree/index.md
@ -1,472 +0,0 @@
 ---
 title: "k-d Tree"
 date: 2024-08-10T11:50:33+01:00
 draft: false # I don't care for draft mode, git has branches for that
 description: "Points in spaaaaace!"
 tags:
  - algorithms
  - data structures
  - python
 categories:
  - programming
 series:
  - Cool algorithms
 favorite: false
 disable_feed: false
 ---
 The [_k-d Tree_][wiki] is a useful way to map points in space and make them
 efficient to query.
 I ran into them during my studies in graphics, as they are one of the
 possible acceleration structures for [ray-casting] operations.
 [wiki]: https://en.wikipedia.org/wiki/K-d_tree
 [ray-casting]: https://en.wikipedia.org/wiki/Ray_casting
 <!--more-->
 ## Implementation
 As usual, this will be in Python, though its lack of proper discriminated enums
 makes it more verbose than would otherwise be necessary.
 ### Pre-requisites
 Let's first define what kind of space our _k-d Tree_ is dealing with. In this
 instance $k = 3$ just like in the normal world.
 ```python
 class Point(NamedTuple):
    x: float
    y: float
    z: float
 class Axis(IntEnum):
    X = 0
    Y = 1
    Z = 2
    def next(self) -> Axis:
        # Each level of the tree is split along a different axis
        return Axis((self + 1) % 3)
 ```
 ### Representation
 The tree is represented by `KdTree`, each of its leaf nodes is a `KdLeafNode`
 and its inner nodes are `KdSplitNode`s.
 For each point in space, the tree can also keep track of an associated value,
 similar to a dictionary or other mapping data structure. Hence we will make our
 `KdTree` generic to this mapped type `T`.
 #### Leaf node
 A leaf node contains a number of points that were added to the tree. For each
 point, we also track their mapped value, hence the `dict[Point, T]`.
 ```python
 class KdLeafNode[T]:
    points: dict[Point, T]
    def __init__(self):
        self.points = {}
 ```
 #### Split node
 An inner node must partition the space into two sub-spaces along a given axis
 and mid-point (thus defining a plane). All points that are "to the left" of the
 plane will be kept in one child, while all the points "to the right" will be in
 the other. Similar to a [_Binary Search Tree_][bst]'s inner nodes.
 [bst]: https://en.wikipedia.org/wiki/Binary_search_tree
 ```python
 class KdSplitNode[T]:
    axis: Axis
    mid: float
    children: tuple[KdTreeNode[T], KdTreeNode[T]]
    # Convenience function to index into the child which contains `point`
    def _index(self, point: Point) -> int:
        return 0 if point[self.axis] <= self.mid else 1
 ```
 #### Tree
 The tree itself is merely a wrapper around its inner nodes.
 Once annoying issue about writing this in Python is the lack of proper
 discriminated enum types. So we need to create a wrapper type for the nodes
 (`KdNode`) to allow for splitting when updating the tree.
 ```python
 class KdNode[T]:
    # Wrapper around leaf/inner nodes, the poor man's discriminated enum
    inner: KdLeafNode[T] | KdSplitNode[T]
    def __init__(self):
        self.inner = KdLeafNode()
    # Convenience constructor used when splitting a node
    @classmethod
    def from_items(cls, items: Iterable[tuple[Point, T]]) -> KdNode[T]:
        res = cls()
        res.inner.points.update(items)
        return res
 class KdTree[T]:
    _root: KdNode[T]
    def __init__(self):
        # Tree starts out empty
        self._root = KdNode()
 ```
 ### Inserting a point
 To add a point to the tree, we simply recurse from node to node, similar to a
 _BST_'s insertion algorithm. Once we've found the correct leaf node to insert
 our point into, we simply do so.
 If that leaf node goes over the maximum number of points it can store, we must
 then split it along an axis, cycling between `X`, `Y`, and `Z` at each level of
 the tree (i.e: splitting along the `X` axis on the first level, then `Y` on the
 second, then `Z` after that, and then `X`, etc...).
 ```python
 # How many points should be stored in a leaf node before being split
 MAX_CAPACITY = 32
 def median(values: Iterable[float]) -> float:
    sorted_values = sorted(values)
    mid_point = len(sorted_values) // 2
    if len(sorted_values) % 2 == 1:
        return sorted_values[mid_point]
    a, b = sorted_values[mid_point], sorted_values[mid_point + 1]
    return a + (b - a) / 2
 def partition[T](
    pred: Callable[[T], bool],
    iterable: Iterable[T]
 ) -> tuple[list[T], list[T]]:
    truths, falses = [], []
    for v in iterable:
        (truths if pred(v) else falses).append(v)
    return truths, falses
 def split_leaf[T](node: KdLeafNode[T], axis: Axis) -> KdSplitNode[T]:
    # Find the median value for the given axis
    mid = median(p[axis] for p in node.points)
    # Split into left/right children according to the mid-point and axis
    left, right = partition(lambda kv: kv[0][axis] <= mid, node.points.items())
    return KdSplitNode(
        split_axis,
        mid,
        (KdNode.from_items(left), KdNode.from_items(right)),
    )
 class KdTree[T]:
    def insert(self, point: Point, val: T) -> bool:
        # Forward to the root node, choose `X` as the first split axis
        return self._root.insert(point, val, Axis.X)
 class KdLeafNode[T]:
    def insert(self, point: Point, val: T, split_axis: Axis) -> bool:
        # Check whether we're overwriting a previous value
        was_mapped = point in self.points
        # Store the corresponding value
        self.points[point] = val
        # Return whether we've performed an overwrite
        return was_mapped
 class KdSplitNode[T]:
    def insert(self, point: Point, val: T, split_axis: Axis) -> bool:
        # Find the child which contains the point
        child = self.children[self._index(point)]
        # Recurse into it, choosing the next split axis
        return child.insert(point, val, split_axis.next())
 class KdNode[T]:
    def insert(self, point: Point, val: T, split_axis: Axis) -> bool:
        # Add the point to the wrapped node...
        res = self.inner.insert(point, val, split_axis)
        # ... And take care of splitting leaf nodes when necessary
        if (
            isinstance(self.inner, KdLeafNode)
            and len(self.inner.points) > MAX_CAPACITY
        ):
            self.inner = split_leaf(self.inner, split_axis)
        return res
 ```
 ### Searching for a point
 Looking for a given point in the tree look very similar to a _BST_'s search,
 each leaf node dividing the space into two sub-spaces, only one of which
 contains the point.
 ```python
 class KdTree[T]:
    def lookup(self, point: Point) -> T | None:
        # Forward to the root node
        return self._root.lookup(point)
 class KdNode[T]:
    def lookup(self, point: Point) -> T | None:
        # Forward to the wrapped node
        return self.inner.lookup(point)
 class KdLeafNode[T]:
    def lookup(self, point: Point) -> T | None:
        # Simply check whether we've stored the point in this leaf
        return self.points.get(point)
 class KdSplitNode[T]:
    def lookup(self, point: Point) -> T | None:
        # Recurse into the child which contains the point
        return self.children[self._index(point)].lookup(point)
 ```
 ### Closest points
 Now to look at the most interesting operation one can do on a _k-d Tree_:
 querying for the objects which are closest to a given point (i.e: the [Nearest
 neighbour search][nns].
 This is a more complicated algorithm, which will also need some modifications to
 current _k-d Tree_ implementation in order to track just a bit more information
 about the points it contains.
 [nns]: https://en.wikipedia.org/wiki/Nearest_neighbor_search
 #### A notion of distance
 To search for the closest points to a given origin, we first need to define
 which [distance](https://en.wikipedia.org/wiki/Distance) we are using in our
 space.
 For this example, we'll simply be using the usual definition of [(Euclidean)
 distance][euclidean-distance].
 [euclidean-distance]: https://en.wikipedia.org/wiki/Euclidean_distance
 ```python
 def dist(point: Point, other: Point) -> float:
    return sqrt(sum((a - b) ** 2 for a, b in zip(self, other)))
 ```
 #### Tracking the tree's boundaries
 To make the query efficient, we'll need to track the tree's boundaries: the
 bounding box of all points contained therein. This will allow us to stop the
 search early once we've found enough points and can be sure that the rest of the
 tree is too far away to qualify.
 For this, let's define the `AABB` (Axis-Aligned Bounding Box) class.
 ```python
 class Point(NamedTuple):
    # Convenience function to replace the coordinate along a given dimension
    def replace(self, axis: Axis, new_coord: float) -> Point:
        coords = list(self)
        coords[axis] = new_coord
        return Point(coords)
 class AABB(NamedTuple):
    # Lowest coordinates in the box
    low: Point
    # Highest coordinates in the box
    high: Point
    # An empty box
    @classmethod
    def empty(cls) -> AABB:
        return cls(
            Point(*(float("inf"),) * 3),
            Point(*(float("-inf"),) * 3),
        )
    # Split the box into two along a given axis for a given mid-point
    def split(axis: Axis, mid: float) -> tuple[AABB, AABB]:
        assert self.low[axis] <= mid <= self.high[axis]
        return (
            AABB(self.low, self.high.replace(axis, mid)),
            AABB(self.low.replace(axis, mid), self.high),
        )
    # Extend a box to contain a given point
    def extend(self, point: Point) -> None:
        low = NamedTuple(*(map(min, zip(self.low, point))))
        high = NamedTuple(*(map(max, zip(self.high, point))))
        return AABB(low, high)
    # Return the shortest between a given point and the box
    def dist_to_point(self, point: Point) -> float:
        deltas = (
            max(self.low[axis] - point[axis], 0, point[axis] - self.high[axis])
            for axis in Axis
        )
        return dist(Point(0, 0, 0), Point(*deltas))
 ```
 And do the necessary modifications to the `KdTree` to store the bounding box and
 update it as we add new points.
 ```python
 class KdTree[T]:
    _root: KdNode[T]
    # New field: to keep track of the tree's boundaries
    _aabb: AABB
    def __init__(self):
        self._root = KdNode()
        # Initialize the empty tree with an empty bounding box
        self._aabb = AABB.empty()
    def insert(self, point: Point, val: T) -> bool:
        # Extend the AABB for our k-d Tree when adding a point to it
        self._aabb = self._aabb.extend(point)
        return self._root.insert(point, val, Axis.X)
 ```
 #### `MaxHeap`
 Python's builtin [`heapq`][heapq] module provides the necessary functions to
 create and interact with a [_Priority Queue_][priority-queue], in the form of a
 [_Binary Heap_][binary-heap].
 Unfortunately, Python's library maintains a _min-heap_, which keeps the minimum
 element at the root. For this algorithm, we're interested in having a
 _max-heap_, with the maximum at the root.
 Thankfully, one can just reverse the comparison function for each element to
 convert between the two. Let's write a `MaxHeap` class making use of this
 library, with a `Reverse` wrapper class to reverse the order of elements
 contained within it (similar to [Rust's `Reverse`][reverse]).
 [binary-heap]: https://en.wikipedia.org/wiki/Binary_heap
 [heapq]: https://docs.python.org/3/library/heapq.html
 [priority-queue]: https://en.wikipedia.org/wiki/Priority_queue
 [reverse]: https://doc.rust-lang.org/std/cmp/struct.Reverse.html
 ```python
 # Reverses the wrapped value's ordering
@functools.total_ordering
 class Reverse[T]:
    value: T
    def __init__(self, value: T):
        self.value = value
    def __lt__(self, other: Reverse[T]) -> bool:
        return self.value > other.value
    def __eq__(self, other: Reverse[T]) -> bool:
        return self.value == other.value
 class MaxHeap[T]:
    _heap: list[Reverse[T]]
    def __init__(self):
        self._heap = []
    def __len__(self) -> int:
        return len(self._heap)
    def __iter__(self) -> Iterator[T]:
        yield from (item.value for item in self._heap)
    # Push a value on the heap
    def push(self, value: T) -> None:
        heapq.heappush(self._heap, Reverse(value))
    # Peek at the current maximum value
    def peek(self) -> T:
        return self._heap[0].value
    # Pop and return the highest value
    def pop(self) -> T:
        return heapq.heappop(self._heap).value
    # Pushes a value onto the heap, pops and returns the highest value
    def pushpop(self, value: T) -> None:
        return heapq.heappushpop(self._heap, Reverse(value)).value
 ```
 #### The actual Implementation
 Now that we have written the necessary building blocks, let's tackle the
 Implementation of `closest` for our _k-d Tree_.
 ```python
 # Wrapper type for closest points, ordered by `distance`
@dataclasses.dataclass(order=True)
 class ClosestPoint[T](NamedTuple):
    point: Point = field(compare=False)
    value: T = field(compare=False)
    distance: float
 class KdTree[T]:
    def closest(self, point: Point, n: int = 1) -> list[ClosestPoint[T]]:
        assert n > 0
        # Create the output heap
        res = MaxHeap()
        # Recurse onto the root node
        self._root.closest(point, res, n, self._aabb)
        # Return the resulting list, from closest to farthest
        return sorted(res)
 class KdNode[T]:
    def closest(
        self,
        point: Point,
        out: MaxHeap[ClosestPoint[T]],
        n: int,
        bounds: AABB,
    ) -> None:
        # Forward to the wrapped node
        self.inner.closest(point, out, n, bounds)
 class KdLeafNode[T]:
    def closest(
        self,
        point: Point,
        out: MaxHeap[ClosestPoint[T]],
        n: int,
        bounds: AABB,
    ) -> None:
        # At the leaf, simply iterate over all points and add them to the heap
        for p, val in self.points.items():
            item = ClosestPoint(p, val, dist(p, point))
            if len(out) < n:
                # If the heap isn't full, just push
                out.push(item)
            elif out.peek().distance > item.distance:
                # Otherwise, push and pop to keep the heap at `n` elements
                out.pushpop(item)
 class KdSplitNode[T]:
    def closest(
        self,
        point: Point,
        out: list[ClosestPoint[T]],
        n: int,
        bounds: AABB,
    ) -> None:
        index = self._index(point)
        children_bounds = bounds.split(self.axis, self.mid)
        # Iterate over the child which contains the point, then its neighbour
        for i in (index, 1 - index):
            child, bounds = self.children[i], children_bounds[i]
            # `min_dist` is 0 for the first child, and the minimum distance of
            # all points contained in the second child
            min_dist = bounds.dist_to_point(point)
            # If the heap is at capacity and the child to inspect too far, stop
            if len(out) == n and min_dist > out.peek().distance:
                return
            # Otherwise, recurse
            child.closest(point, out, n, bounds)
 ```
--- a/content/posts/2024-08-17-kd-tree-revisited/index.md
+++ b/content/posts/2024-08-17-kd-tree-revisited/index.md
@ -1,112 +0,0 @@
 ---
 title: "Kd Tree Revisited"
 date: 2024-08-17T14:20:22+01:00
 draft: false # I don't care for draft mode, git has branches for that
 description: "Simplifying the nearest neighbour search"
 tags:
  - algorithms
  - data structures
  - python
 categories:
  - programming
 series:
  - Cool algorithms
 favorite: false
 disable_feed: false
 ---
 After giving it a bit of thought, I've found a way to simplify the nearest
 neighbour search (i.e: the `closest` method) for the `KdTree` I implemented in
 [my previous post]({{< relref "../2024-08-10-kd-tree/index.md" >}}).
 <!--more-->
 ## The improvement
 That post implemented the nearest neighbour search by keeping track of the
 tree's boundaries (through `AABB`), and each of its sub-trees (through
 `AABB.split`), and testing for the early exit condition by computing the
 distance of the search's origin to each sub-tree's boundaries.
 Instead of _explicitly_ keeping track of each sub-tree's boundaries, we can
 implicitly compute it when recursing down the tree.
 To check for the distance between the queried point and the splitting plane of
 inner nodes: we simply need to project the origin onto that plane, thus giving
 us a minimal bound on the distance of the points stored on the other side.
 This can be easily computed from the `axis` and `mid` values which are stored in
 the inner nodes: to project the node on the plane we simply replace its
 coordinate for this axis by `mid`.
 ## Simplified search
 With that out of the way, let's now see how `closest` can be implemented without
 needing to track the tree's `AABB` at the root:
 ```python
 # Wrapper type for closest points, ordered by `distance`
@dataclasses.dataclass(order=True)
 class ClosestPoint[T](NamedTuple):
    point: Point = field(compare=False)
    value: T = field(compare=False)
    distance: float
 class KdTree[T]:
    def closest(self, point: Point, n: int = 1) -> list[ClosestPoint[T]]:
        assert n > 0
        res = MaxHeap()
        # Instead of passing an `AABB`, we give an initial projection point,
        # the query origin itself (since we haven't visited any split node yet)
        self._root.closest(point, res, n, point)
        return sorted(res)
 class KdNode[T]:
    def closest(
        self,
        point: Point,
        out: MaxHeap[ClosestPoint[T]],
        n: int,
        projection: Point,
    ) -> None:
        # Same implementation
        self.inner.closest(point, out, n, bounds)
 class KdLeafNode[T]:
    def closest(
        self,
        point: Point,
        out: MaxHeap[ClosestPoint[T]],
        n: int,
        projection: Point,
    ) -> None:
        # Same implementation
        for p, val in self.points.items():
            item = ClosestPoint(p, val, dist(p, point))
            if len(out) < n:
                out.push(item)
            elif out.peek().distance > item.distance:
                out.pushpop(item)
 class KdSplitNode[T]:
    def closest(
        self,
        point: Point,
        out: list[ClosestPoint[T]],
        n: int,
        projection: Point,
    ) -> None:
        index = self._index(point)
        self.children[index].closest(point, out, n, projection)
        # Project onto the splitting plane, for a minimum distance to its points
        projection = projection.replace(self.axis, self.mid)
        # If we're at capacity and can't possibly find any closer points, exit
        if len(out) == n and dist(point, projection) > out.peek().distance:
            return
        # Otherwise recurse on the other side to check for nearer neighbours
        self.children[1 - index].closest(point, out, n, projection)
 ```
 As you can see, the main difference is in `KdSplitNode`'s implementation, where
 we can quickly compute the minimum distance between the search's origin and all
 potential points in that subspace.
--- a/content/posts/2020-07-16-generic-flyweight-cpp/index.md
+++ b/content/posts/2020-07-16-generic-flyweight-cpp/index.md
@ -16,7 +16,7 @@ favorite: false
 The flyweight is a well-known
 [GoF](https://en.wikipedia.org/wiki/Design_Patterns) design pattern.
-Its intent is to minimize memory usage by reducing the number of instantiations
+It's intent is to minimize memory usage by reducing the number of instantiations
 of a given object.
 I will show you how to implement a robust flyweight in C++, as well as a way to
--- a/content/posts/2020-12-07-git-basics/index.md
+++ b/content/posts/2020-12-07-git-basics/index.md
@ -2,12 +2,14 @@
 title: "Git Basics"
 date: 2020-12-07 18:54:31+0100
 draft: false # I don't care for draft mode, git has branches for that
-description: "The next step after the basics"
+description: ""
 tags:
  - git 
  - cli
 categories:
  - programming
 series:
  - Git basics
 favorite: false
 ---
@ -134,7 +136,7 @@ branch.
 #### Fixup, a practical example
 A specific kind of squashing which I use frequently is the notion of `fixup`s.
-Say you've committed a change (*A*), and later on notice that it is missing
+Say you've commited a change (*A*), and later on notice that it is missing
 a part of the changeset. You can decide to commit that missing part (*A-bis*)
 and annotate it to mean that it is linked to *A*.
@ -184,7 +186,7 @@ After applying the rebase, you find yourself with the complete change inside
 This is especially useful when you want to apply suggestion on a merge request
 after it was reviewed. You can keep a clean history without those pesky `Apply
-suggestion ...` commits being part of your history.
+suggestion ...` commmits being part of your history.
 ### Lost commits and the reflog
@ -315,7 +317,7 @@ easily choose which parts of your changes should end up in the same commit.
 Here's a list of commands that you should read-up on, but I won't be presenting
 further:
-* `git bisect`
+* `git bissect`
 * `git rerere`
 * `git stash`
 * and more...
--- a/content/posts/2020-07-14-hello-world/index.md
+++ b/content/posts/2020-07-14-hello-world/index.md
@ -8,8 +8,6 @@ tags:
 categories:
 favorite: false
 tikz: true
 graphviz: true
 mermaid: true
 ---
 ## Test post please ignore
@ -42,29 +40,6 @@ echo hello world | cut -d' ' -f 1
  \end{tikzpicture}
 {{% /tikz %}}
 ### Graphviz support
 {{% graphviz %}}
  graph {
    a -- b
    b -- c
    c -- a
  }
 {{% /graphviz %}}
 ### Mermaid support
 {{% mermaid %}}
  graph TD
  A[Enter Chart Definition] --> B(Preview)
  B --> C{decide}
  C --> D[Keep]
  C --> E[Edit Definition]
  E --> B
  D --> F[Save Image and Code]
  F --> B
 {{% /graphviz %}}
 ### Spoilers
 {{% spoiler "Don't open me" %}}
--- a/content/posts/2021-01-15-plaintext-accounting/index.md
+++ b/content/posts/2021-01-15-plaintext-accounting/index.md
--- a/content/posts/2020-07-22-polymorphic-flyweight-cpp/index.md
+++ b/content/posts/2020-07-22-polymorphic-flyweight-cpp/index.md
@ -14,8 +14,8 @@ favorite: false
 ---
 Coming back from our last post about [generic flyweights in C++]({{< relref
-"../2020-07-16-generic-flyweight-cpp/index.md" >}}), we can write a flyweight
+"generic-flyweight-cpp.md" >}}), we can write a flyweight that can be used with
-that can be used with any abstract base classes.
+any abstract base classes.
 <!--more-->
@ -68,7 +68,7 @@ public:
        const std::type_index lhs_i(lhs);
        const std::type_index rhs_i(rhs);
        if (lhs_i != rhs_i)
-            return lhs_i < rhs_i;
+            returh lhs_i < rhs_i;
        // We are now assured that both classes have the same type
        return less_than(rhs);
    }
--- a/content/posts/2020-07-15-the-drone-ci-debacle/index.md
+++ b/content/posts/2020-07-15-the-drone-ci-debacle/index.md
--- a/content/posts/2021-02-09-the-great-nix-exode/index.md
+++ b/content/posts/2021-02-09-the-great-nix-exode/index.md
--- a/content/wish-lists.md
+++ b/content/wish-lists.md
@ -8,19 +8,7 @@ disable_feed: true
 A few of my Amazon wish lists in case you want to give me a gift.
 * [Wish list](https://www.amazon.fr/hz/wishlist/ls/1FT0IO9JJTX57)
-* ~~[Board games](https://www.amazon.fr/hz/wishlist/ls/2NY50W36THGMW)~~
+* [Board games](https://www.amazon.fr/hz/wishlist/ls/2NY50W36THGMW)
  * See the [Board Game Geek list](https://boardgamegeek.com/wishlist/Ambroisie)
  which is better curated and more up-to-date
 * [Cooking & kitchenware](https://www.amazon.fr/hz/wishlist/ls/2MNRCLPNABZSU)
 * [Data Structures & Algorithms](https://www.amazon.fr/hz/wishlist/ls/2XZPQSBOGOFC3)
 * [Programming](https://www.amazon.fr/hz/wishlist/ls/1R4KFV4H2D8IF)
 * Other items:
  * [Chef's presses](https://www.thechefspress.com/shop)
    * [UK link](https://www.kitchenprovisions.co.uk/products/the-chefs-press-8oz-13oz)
    * [FR link](https://www.thekitchenlab.fr/fr/p/la-presse-du-chef-poids-de-friture-bruce-hill-13-oz/)
  * [Combustion Inc thermometer and
  display](https://combustion.inc/products/predictive-thermometer-gen2-wifi-display)
    * [Get the one with the range extender if you *really* want to spoil
    me](https://combustion.inc/products/predictive-thermometer-gen2-wifi-booster-wifi-display)
  * [Cannelés
  molds](https://www.laboetgato.fr/en/moules-a-canneles/13964-mould-for-canneles-non-polished-copper-o-45-mm-3333331010026.html)
--- a/flake.lock
+++ b/flake.lock
@ -1,68 +1,28 @@
 {
  "nodes": {
    "flake-compat": {
      "flake": false,
      "locked": {
        "lastModified": 1696426674,
        "narHash": "sha256-kvjfFW7WAETZlt09AgDn1MrtKzP7t90Vf7vypd3OL1U=",
        "owner": "edolstra",
        "repo": "flake-compat",
        "rev": "0f9255e01c2351cc7d116c072cb317785dd33b33",
        "type": "github"
      },
      "original": {
        "owner": "edolstra",
        "repo": "flake-compat",
        "type": "github"
      }
    },
    "futils": {
      "inputs": {
        "systems": "systems"
      },
      "locked": {
-        "lastModified": 1710146030,
+        "lastModified": 1622445595,
-        "narHash": "sha256-SZ5L6eA7HJ/nmkzGG7/ISclqe6oZdOZTNoesiInkXPQ=",
+        "narHash": "sha256-m+JRe6Wc5OZ/mKw2bB3+Tl0ZbtyxxxfnAWln8Q5qs+Y=",
        "owner": "numtide",
        "repo": "flake-utils",
-        "rev": "b1d9ab70662946ef0850d488da1c9019f3a9752a",
+        "rev": "7d706970d94bc5559077eb1a6600afddcd25a7c8",
        "type": "github"
      },
      "original": {
        "owner": "numtide",
-        "ref": "main",
+        "ref": "master",
        "repo": "flake-utils",
        "type": "github"
      }
    },
    "gitignore": {
      "inputs": {
        "nixpkgs": [
          "pre-commit-hooks",
          "nixpkgs"
        ]
      },
      "locked": {
        "lastModified": 1709087332,
        "narHash": "sha256-HG2cCnktfHsKV0s4XW83gU3F57gaTljL9KNSuG6bnQs=",
        "owner": "hercules-ci",
        "repo": "gitignore.nix",
        "rev": "637db329424fd7e46cf4185293b9cc8c88c95394",
        "type": "github"
      },
      "original": {
        "owner": "hercules-ci",
        "repo": "gitignore.nix",
        "type": "github"
      }
    },
    "nixpkgs": {
      "locked": {
-        "lastModified": 1722415718,
+        "lastModified": 1628320020,
-        "narHash": "sha256-5US0/pgxbMksF92k1+eOa8arJTJiPvsdZj9Dl+vJkM4=",
+        "narHash": "sha256-4xBEb+TOHyIGpK37EVsZx6dGPwNMf5YWNBJaQ4VyZws=",
        "owner": "NixOS",
        "repo": "nixpkgs",
-        "rev": "c3392ad349a5227f4a3464dce87bcc5046692fce",
+        "rev": "67c80531be622641b5b2ccc3a7aff355cb02476b",
        "type": "github"
      },
      "original": {
@ -74,21 +34,19 @@
    },
    "pre-commit-hooks": {
      "inputs": {
-        "flake-compat": "flake-compat",
+        "flake-utils": [
-        "gitignore": "gitignore",
+          "futils"
        "nixpkgs": [
          "nixpkgs"
        ],
-        "nixpkgs-stable": [
+        "nixpkgs": [
          "nixpkgs"
        ]
      },
      "locked": {
-        "lastModified": 1721042469,
+        "lastModified": 1621411868,
-        "narHash": "sha256-6FPUl7HVtvRHCCBQne7Ylp4p+dpP3P/OYuzjztZ4s70=",
+        "narHash": "sha256-R+7OQ2JYFCb3E7Jl7LhRifzMVCR6Gl8R98zYsNhZtJ8=",
        "owner": "cachix",
        "repo": "pre-commit-hooks.nix",
-        "rev": "f451c19376071a90d8c58ab1a953c6e9840527fd",
+        "rev": "2e7fac06108b4fc81f5ff9ed9a02bc4f6ede7001",
        "type": "github"
      },
      "original": {
@ -104,21 +62,6 @@
        "nixpkgs": "nixpkgs",
        "pre-commit-hooks": "pre-commit-hooks"
      }
    },
    "systems": {
      "locked": {
        "lastModified": 1681028828,
        "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
        "owner": "nix-systems",
        "repo": "default",
        "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
        "type": "github"
      },
      "original": {
        "owner": "nix-systems",
        "repo": "default",
        "type": "github"
      }
    }
  },
  "root": "root",
--- a/flake.nix
+++ b/flake.nix
@ -6,7 +6,7 @@
      type = "github";
      owner = "numtide";
      repo = "flake-utils";
-      ref = "main";
+      ref = "master";
    };
    nixpkgs = {
@ -22,8 +22,8 @@
      repo = "pre-commit-hooks.nix";
      ref = "master";
      inputs = {
        flake-utils.follows = "futils";
        nixpkgs.follows = "nixpkgs";
        nixpkgs-stable.follows = "nixpkgs";
      };
    };
  };
@ -61,17 +61,15 @@
          };
        };
-        devShells = {
+        devShell = pkgs.mkShell {
-          default = pkgs.mkShell {
+          name = "blog";
            name = "blog";
-            buildInputs = with pkgs; [
+          buildInputs = with pkgs; [
-              gnumake
+            gnumake
-              hugo
+            hugo
-            ];
+          ];
-            inherit (self.checks.${system}.pre-commit) shellHook;
+          inherit (self.checks.${system}.pre-commit) shellHook;
          };
        };
      }
    );
--- a/i18n/en.yaml
+++ b/i18n/en.yaml
@ -1,5 +1,5 @@
-series:
+serie:
-  other: "series"
+  other: "serie"
 Series:
  other: "Series"
--- a/i18n/fr.yaml
+++ b/i18n/fr.yaml
@ -1,4 +1,4 @@
-series:
+serie:
  other: "série"
 Series:
--- a/layouts/partials/footer-extra.html
+++ b/layouts/partials/footer-extra.html
@ -11,7 +11,6 @@
        <a data-hint="Sourcehut" title="Sourcehut" href="https://sr.ht/~{{ .Site.Author.sourcehut }}" target="_blank" rel="me"> Sourcehut </a>
        <a data-hint="LinkedIn" title="LinkedIn" href="https://www.linkedin.com/in/{{ .Site.Author.linkedin }}" target="_blank" rel="me"> LinkedIn </a>
        <a data-hint="Matrix" title="Matrix" href="https://matrix.to/#/{{ .Site.Author.matrix }}" target="_blank" rel="me"> Matrix </a>
        <a data-hint="Mastodon" title="Mastodon" href="https://{{ .Site.Author.mastodon }}" target="_blank" rel="me"> Mastodon </a>
        <a rel="pgpkey" href="https://key.belanyi.fr/key.pgp"> PGP </a>
        <link rel="authorization_endpoint" href="https://indieauth.com/auth">
        <p>
--- a/layouts/partials/head-extra.html
+++ b/layouts/partials/head-extra.html
@ -3,30 +3,6 @@
    <link rel="stylesheet" type="text/css" href="https://tikzjax.com/v1/fonts.css">
    <script async src="https://tikzjax.com/v1/tikzjax.js"></script>
 {{ end }}
 <!-- Graphviz support -->
 {{ if (.Params.graphviz) }}
    <script src="https://cdn.jsdelivr.net/npm/@viz-js/viz@3.7.0/lib/viz-standalone.min.js"></script>
    <script type="text/javascript">
    (function() {
        Viz.instance().then(function(viz) {
            Array.prototype.forEach.call(document.querySelectorAll("pre.graphviz"), function(x) {
                var svg = viz.renderSVGElement(x.innerText);
                // Let CSS take care of the SVG size
                svg.removeAttribute("width")
                svg.setAttribute("height", "auto")
                x.replaceChildren(svg)
            })
        })
    })();
    </script>
 {{ end }}
 <!-- Mermaid support -->
 {{ if (.Params.mermaid) }}
    <script type="module" async>
        import mermaid from "https://cdn.jsdelivr.net/npm/mermaid@latest/dist/mermaid.esm.min.mjs";
        mermaid.initialize({ startOnLoad: true });
    </script>
 {{ end }}
 {{ with .OutputFormats.Get "atom" -}}
    {{ printf `<link rel="%s" type="%s" href="%s" title="%s" />` .Rel .MediaType.Type .Permalink $.Site.Title | safeHTML }}
 {{ end -}}
--- a/layouts/shortcodes/graphviz.html
+++ b/layouts/shortcodes/graphviz.html
@ -1,16 +0,0 @@
 <pre class="graphviz">
    {{ with .Get "file" }}
        {{ if eq (. | printf "%.1s") "/" }}
            {{/* Absolute path are from root of site. */}}
            {{ $.Scratch.Set "filepath" . }}
        {{ else }}
            {{/* Relative paths are from page directory. */}}
            {{ $.Scratch.Set "filepath" $.Page.File.Dir }}
            {{ $.Scratch.Add "filepath" . }}
        {{ end }}
        {{ $.Scratch.Get "filepath" | readFile }}
    {{ else }}
        {{.Inner}}
    {{ end }}
 </pre>
--- a/layouts/shortcodes/mermaid.html
+++ b/layouts/shortcodes/mermaid.html
@ -1,16 +0,0 @@
 <pre class="mermaid">
    {{ with .Get "file" }}
        {{ if eq (. | printf "%.1s") "/" }}
            {{/* Absolute path are from root of site. */}}
            {{ $.Scratch.Set "filepath" . }}
        {{ else }}
            {{/* Relative paths are from page directory. */}}
            {{ $.Scratch.Set "filepath" $.Page.File.Dir }}
            {{ $.Scratch.Add "filepath" . }}
        {{ end }}
        {{ $.Scratch.Get "filepath" | readFile }}
    {{ else }}
        {{.Inner}}
    {{ end }}
 </pre>
--- a/layouts/shortcodes/tikz.html
+++ b/layouts/shortcodes/tikz.html
@ -1,16 +1,3 @@
 <script type="text/tikz">
-    {{ with .Get "file" }}
+    {{.Inner}}
        {{ if eq (. | printf "%.1s") "/" }}
            {{/* Absolute path are from root of site. */}}
            {{ $.Scratch.Set "filepath" . }}
        {{ else }}
            {{/* Relative paths are from page directory. */}}
            {{ $.Scratch.Set "filepath" $.Page.File.Dir }}
            {{ $.Scratch.Add "filepath" . }}
        {{ end }}
        {{ $.Scratch.Get "filepath" | readFile }}
    {{ else }}
        {{.Inner}}
    {{ end }}
 </script>
--- a/themes/anubis
+++ b/themes/anubis
@ -1 +1 @@
-Subproject commit d77e0d6d552ad135138f1f9443ef30cc5af8f0f3
+Subproject commit 5dab60e04a37896c09a32137aefe821c63b3af04
--- a/themes/hugo-atom-feed
+++ b/themes/hugo-atom-feed
@ -1 +1 @@
-Subproject commit d545effed9949bf834eaed09ad423ec3e030794f
+Subproject commit 5da913dc46d2dadcaf6548256238c58c504476de
		`@ -1 +1 @@`
			`Subproject commit d77e0d6d552ad135138f1f9443ef30cc5af8f0f3`				`Subproject commit 5dab60e04a37896c09a32137aefe821c63b3af04`
		`@ -1 +1 @@`
			`Subproject commit d545effed9949bf834eaed09ad423ec3e030794f`				`Subproject commit 5da913dc46d2dadcaf6548256238c58c504476de`