Compare commits
No commits in common. "main" and "45aebb2c816905b8c605bf7cc2e293d227a93d71" have entirely different histories.
main
...
45aebb2c81
34 changed files with 113 additions and 3265 deletions
65
.drone.jsonnet
Normal file
65
.drone.jsonnet
Normal file
|
|
@ -0,0 +1,65 @@
|
|||
local Pipeline(isDev) = {
|
||||
kind: "pipeline",
|
||||
type: "exec",
|
||||
name: if isDev then "Deploy to dev" else "Deploy to prod",
|
||||
# Dev ignores "master", prod only triggers on "master"
|
||||
trigger: { branch: { [if isDev then "exclude" else "include"]: [ "main" ] } },
|
||||
steps: [
|
||||
{
|
||||
# We want to clone the submodules, which isn't done by default
|
||||
name: "submodules",
|
||||
commands: [
|
||||
"git submodule update --recursive --init",
|
||||
]
|
||||
},
|
||||
{
|
||||
# Include pre-commit checks, which include markdownlint
|
||||
name: "check",
|
||||
commands: [
|
||||
"nix flake check",
|
||||
],
|
||||
},
|
||||
{
|
||||
# If dev, include drafts and future articles, change base URL
|
||||
name: "build",
|
||||
commands: [
|
||||
"nix develop -c make " + if isDev then "build-dev" else "build-prod",
|
||||
],
|
||||
},
|
||||
{
|
||||
name: "deploy",
|
||||
commands: [
|
||||
"nix run github:ambroisie/nix-config#drone-scp",
|
||||
],
|
||||
environment: {
|
||||
SCP_SOURCE: "public/*",
|
||||
TAR_STRIP_COMPONENTS: 1, # Remove 'public/' suffix from file paths
|
||||
SCP_RM: true, # Remove previous files from target directory
|
||||
SCP_HOST: { from_secret: "ssh_host" },
|
||||
SCP_TARGET: { from_secret: "ssh_target" + if isDev then "_dev" else "" },
|
||||
SCP_USERNAME: { from_secret: "ssh_user" },
|
||||
SCP_KEY: { from_secret: "ssh_key" },
|
||||
SCP_PORT: { from_secret: "ssh_port" },
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "notify",
|
||||
commands: [
|
||||
"nix run github:ambroisie/matrix-notifier",
|
||||
],
|
||||
environment: {
|
||||
ADDRESS: { from_secret: "matrix_homeserver" },
|
||||
ROOM: { from_secret: "matrix_roomid" },
|
||||
USER: { from_secret: "matrix_username" },
|
||||
PASS: { from_secret: "matrix_password" },
|
||||
},
|
||||
when: { status: [ "failure", "success", ] },
|
||||
},
|
||||
]
|
||||
};
|
||||
|
||||
|
||||
[
|
||||
Pipeline(false),
|
||||
Pipeline(true),
|
||||
]
|
||||
7
.envrc
7
.envrc
|
|
@ -1 +1,8 @@
|
|||
use_flake() {
|
||||
watch_file flake.nix
|
||||
watch_file flake.lock
|
||||
eval "$(nix print-dev-env)"
|
||||
}
|
||||
|
||||
use flake
|
||||
eval "$shellHooks"
|
||||
|
|
|
|||
|
|
@ -1,3 +0,0 @@
|
|||
# MD024/no-duplicate-heading/no-duplicate-header
|
||||
MD024:
|
||||
siblings_only: true
|
||||
|
|
@ -1,64 +0,0 @@
|
|||
labels:
|
||||
backend: local
|
||||
|
||||
matrix:
|
||||
include:
|
||||
- TYPE: dev
|
||||
MAKE_TARGET: build-dev
|
||||
SSH_TARGET: ssh_target_dev
|
||||
- TYPE: prod
|
||||
MAKE_TARGET: build-prod
|
||||
SSH_TARGET: ssh_target
|
||||
|
||||
# Run the correct matrix build on the correct branch
|
||||
when:
|
||||
evaluate: |
|
||||
((CI_COMMIT_BRANCH == CI_REPO_DEFAULT_BRANCH) == ("${TYPE}" == "prod"))
|
||||
|
||||
steps:
|
||||
- name: check
|
||||
image: bash
|
||||
commands:
|
||||
- nix flake check
|
||||
|
||||
- name: build (${TYPE})
|
||||
image: bash
|
||||
commands:
|
||||
# If dev, include drafts and future articles, change base URL
|
||||
- nix develop -c make ${MAKE_TARGET}
|
||||
|
||||
- name: deploy (${TYPE})
|
||||
image: bash
|
||||
environment:
|
||||
# Trailing slash to synchronize the folder's *content* to the target
|
||||
SYNC_SOURCE: public/
|
||||
SYNC_KEY:
|
||||
from_secret: ssh_key
|
||||
SYNC_PORT:
|
||||
from_secret: ssh_port
|
||||
SYNC_TARGET:
|
||||
from_secret: ${SSH_TARGET}
|
||||
SYNC_USERNAME:
|
||||
from_secret: ssh_user
|
||||
SYNC_HOST:
|
||||
from_secret: ssh_host
|
||||
commands:
|
||||
- "nix run github:ambroisie/nix-config#drone-rsync"
|
||||
|
||||
- name: notify
|
||||
image: bash
|
||||
environment:
|
||||
ADDRESS:
|
||||
from_secret: matrix_homeserver
|
||||
ROOM:
|
||||
from_secret: matrix_roomid
|
||||
USER:
|
||||
from_secret: matrix_username
|
||||
PASS:
|
||||
from_secret: matrix_password
|
||||
commands:
|
||||
- nix run github:ambroisie/matrix-notifier
|
||||
when:
|
||||
status:
|
||||
- failure
|
||||
- success
|
||||
|
|
@ -5,18 +5,15 @@ draft: false # I don't care for draft mode, git has branches for that
|
|||
description: ""
|
||||
tags:
|
||||
- accounting
|
||||
- algorithms
|
||||
- c++
|
||||
- ci/cd
|
||||
- cli
|
||||
- data structures
|
||||
- design-pattern
|
||||
- docker
|
||||
- drone
|
||||
- git
|
||||
- hugo
|
||||
- nix
|
||||
- python
|
||||
- self-hosting
|
||||
- test
|
||||
categories:
|
||||
|
|
|
|||
16
config.yaml
16
config.yaml
|
|
@ -6,6 +6,8 @@ theme:
|
|||
- "hugo-atom-feed"
|
||||
- "anubis"
|
||||
paginate: 5
|
||||
disqusShortname: ""
|
||||
googleAnalytics: ""
|
||||
enableRobotsTXT: true
|
||||
enableEmoji: true
|
||||
|
||||
|
|
@ -35,12 +37,11 @@ menu:
|
|||
author:
|
||||
name: "Bruno BELANYI"
|
||||
email: "contact-blog@belanyi.fr"
|
||||
github: "ambroisie"
|
||||
gitlab: "ambroisie"
|
||||
github: "Ambroisie"
|
||||
gitlab: "Ambroisie"
|
||||
sourcehut: "ambroisie"
|
||||
linkedin: "bruno-belanyi"
|
||||
matrix: "@ambroisie:belanyi.fr"
|
||||
mastodon: "nixos.paris/@ambroisie"
|
||||
|
||||
permalinks:
|
||||
posts: /:year/:month/:day/:title/
|
||||
|
|
@ -65,18 +66,11 @@ params:
|
|||
webmentions:
|
||||
login: belanyi.fr
|
||||
pingback: true
|
||||
mathjax: true
|
||||
|
||||
services:
|
||||
disqus:
|
||||
shortname: ""
|
||||
googleAnalytics:
|
||||
ID: ""
|
||||
|
||||
taxonomies:
|
||||
category: "categories"
|
||||
tag: "tags"
|
||||
series: "series"
|
||||
serie: "series"
|
||||
|
||||
markup:
|
||||
goldmark:
|
||||
|
|
|
|||
|
|
@ -4,10 +4,7 @@ description: "About me"
|
|||
date: 2020-07-14
|
||||
---
|
||||
|
||||
I'm currently working as a Software Engineer at [Google][google], as part
|
||||
of their Embedded Graphics Drivers team for Pixel devices.
|
||||
|
||||
[google]: https://www.linkedin.com/company/google/
|
||||
I'm a CS student at EPITA.
|
||||
|
||||
You can find my CV [here](https://cv.belanyi.fr/en.pdf). Or
|
||||
[here](https://cv.belanyi.fr/fr.pdf) for the french version.
|
||||
|
|
|
|||
|
|
@ -8,8 +8,6 @@ tags:
|
|||
categories:
|
||||
favorite: false
|
||||
tikz: true
|
||||
graphviz: true
|
||||
mermaid: true
|
||||
---
|
||||
|
||||
## Test post please ignore
|
||||
|
|
@ -42,29 +40,6 @@ echo hello world | cut -d' ' -f 1
|
|||
\end{tikzpicture}
|
||||
{{% /tikz %}}
|
||||
|
||||
### Graphviz support
|
||||
|
||||
{{% graphviz %}}
|
||||
graph {
|
||||
a -- b
|
||||
b -- c
|
||||
c -- a
|
||||
}
|
||||
{{% /graphviz %}}
|
||||
|
||||
### Mermaid support
|
||||
|
||||
{{% mermaid %}}
|
||||
graph TD
|
||||
A[Enter Chart Definition] --> B(Preview)
|
||||
B --> C{decide}
|
||||
C --> D[Keep]
|
||||
C --> E[Edit Definition]
|
||||
E --> B
|
||||
D --> F[Save Image and Code]
|
||||
F --> B
|
||||
{{% /graphviz %}}
|
||||
|
||||
### Spoilers
|
||||
|
||||
{{% spoiler "Don't open me" %}}
|
||||
|
|
|
|||
|
|
@ -16,7 +16,7 @@ favorite: false
|
|||
The flyweight is a well-known
|
||||
[GoF](https://en.wikipedia.org/wiki/Design_Patterns) design pattern.
|
||||
|
||||
Its intent is to minimize memory usage by reducing the number of instantiations
|
||||
It's intent is to minimize memory usage by reducing the number of instantiations
|
||||
of a given object.
|
||||
|
||||
I will show you how to implement a robust flyweight in C++, as well as a way to
|
||||
|
|
|
|||
|
|
@ -68,7 +68,7 @@ public:
|
|||
const std::type_index lhs_i(lhs);
|
||||
const std::type_index rhs_i(rhs);
|
||||
if (lhs_i != rhs_i)
|
||||
return lhs_i < rhs_i;
|
||||
returh lhs_i < rhs_i;
|
||||
// We are now assured that both classes have the same type
|
||||
return less_than(rhs);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -8,6 +8,8 @@ tags:
|
|||
- cli
|
||||
categories:
|
||||
- programming
|
||||
series:
|
||||
- Git basics
|
||||
favorite: false
|
||||
---
|
||||
|
||||
|
|
@ -134,7 +136,7 @@ branch.
|
|||
#### Fixup, a practical example
|
||||
|
||||
A specific kind of squashing which I use frequently is the notion of `fixup`s.
|
||||
Say you've committed a change (*A*), and later on notice that it is missing
|
||||
Say you've commited a change (*A*), and later on notice that it is missing
|
||||
a part of the changeset. You can decide to commit that missing part (*A-bis*)
|
||||
and annotate it to mean that it is linked to *A*.
|
||||
|
||||
|
|
@ -184,7 +186,7 @@ After applying the rebase, you find yourself with the complete change inside
|
|||
|
||||
This is especially useful when you want to apply suggestion on a merge request
|
||||
after it was reviewed. You can keep a clean history without those pesky `Apply
|
||||
suggestion ...` commits being part of your history.
|
||||
suggestion ...` commmits being part of your history.
|
||||
|
||||
### Lost commits and the reflog
|
||||
|
||||
|
|
@ -315,7 +317,7 @@ easily choose which parts of your changes should end up in the same commit.
|
|||
Here's a list of commands that you should read-up on, but I won't be presenting
|
||||
further:
|
||||
|
||||
* `git bisect`
|
||||
* `git bissect`
|
||||
* `git rerere`
|
||||
* `git stash`
|
||||
* and more...
|
||||
|
|
|
|||
|
|
@ -1,329 +0,0 @@
|
|||
---
|
||||
title: "Multiple Dispatch in C++"
|
||||
date: 2022-11-02T16:36:53+01:00
|
||||
draft: false # I don't care for draft mode, git has branches for that
|
||||
description: "A Lisp super-power in C++"
|
||||
tags:
|
||||
- c++
|
||||
- design-pattern
|
||||
categories:
|
||||
- programming
|
||||
series:
|
||||
favorite: false
|
||||
disable_feed: false
|
||||
---
|
||||
|
||||
A great feature that can be used in more dynamic languages is *multiple
|
||||
dispatch*. Here's an example in [Julia][julia-lang] taken from the [Wikipedia
|
||||
article][wiki-multiple-dispatch].
|
||||
|
||||
```julia
|
||||
abstract type SpaceObject end
|
||||
|
||||
struct Asteroid <: SpaceObject
|
||||
# Asteroid fields
|
||||
end
|
||||
struct Spaceship <: SpaceObject
|
||||
# Spaceship fields
|
||||
end
|
||||
|
||||
collide_with(::Asteroid, ::Spaceship) = # Asteroid/Spaceship collision
|
||||
collide_with(::Spaceship, ::Asteroid) = # Spaceship/Asteroid collision
|
||||
collide_with(::Spaceship, ::Spaceship) = # Spaceship/Spaceship collision
|
||||
collide_with(::Asteroid, ::Asteroid) = # Asteroid/Asteroid collision
|
||||
|
||||
collide(x::SpaceObject, y::SpaceObject) = collide_with(x, y)
|
||||
```
|
||||
|
||||
The `collide` function calls `collide_with` which, at runtime, will inspect the
|
||||
types of its arguments and *dispatch* to the appropriate implementation.
|
||||
|
||||
Julia was created with multiple dispatch as a first-class citizen, it is used
|
||||
liberally in its ecosystem. C++ does not have access to such a feature natively,
|
||||
but there are alternatives that I will be presenting in this article, and try to
|
||||
justify there uses and limitations.
|
||||
|
||||
[julia-lang]: https://julialang.org/
|
||||
[wiki-multiple-dispatch]: https://en.wikipedia.org/wiki/Multiple_dispatch
|
||||
<!--more-->
|
||||
|
||||
## Single dispatch
|
||||
|
||||
The native way to perform dynamic dispatch in C++ is through the
|
||||
use of *virtual methods*, which allows an object to *override* the behaviour of
|
||||
one of its super-classes' method.
|
||||
|
||||
Invoking a virtual method will perform *single dispatch*, on the dynamic type
|
||||
of the object who's method is being called.
|
||||
|
||||
Here is an example:
|
||||
|
||||
```cpp
|
||||
struct SpaceObject {
|
||||
virtual ~SpaceObject() = default;
|
||||
|
||||
// Pure virtual method, which must be overridden by non-abstract sub-classes
|
||||
virtual void impact() = 0;
|
||||
};
|
||||
|
||||
struct Asteroid : SpaceObject {
|
||||
// Override the method for asteroid impacts
|
||||
void impact() override {
|
||||
std::cout << "Bang!\n";
|
||||
}
|
||||
};
|
||||
|
||||
struct Spaceship : SpaceObject {
|
||||
// Override the method for spaceship impacts
|
||||
void impact() override {
|
||||
std::cout << "Crash!\n";
|
||||
}
|
||||
};
|
||||
|
||||
int main() {
|
||||
std::unique_ptr<SpaceObject> object = std::make_unique<Spaceship>();
|
||||
object->impact(); // Prints "Crash!"
|
||||
|
||||
object = std::make_unique<Asteroid>();
|
||||
object->impact(); // Prints "Bang!"
|
||||
}
|
||||
```
|
||||
|
||||
Virtual methods are great when you want to represent a common set of behaviour
|
||||
(an *interface*), and be able to substitute various types with their specific
|
||||
implementation.
|
||||
|
||||
For example, a dummy file-system interface might look like the following:
|
||||
|
||||
```cpp
|
||||
struct Filesystem {
|
||||
virtual void write(std::string_view filename, std::span<char> data) = 0;
|
||||
virtual std::vector<char> read(std::string_view filename) = 0;
|
||||
virtual void delete(std::string_view filename) = 0;
|
||||
};
|
||||
```
|
||||
|
||||
You can then write `PosixFilesystem` which makes use of the POSIX API and
|
||||
interact with actual on-disk data, `MockFilesystem` which only works in-memory
|
||||
and can be used for testing, etc...
|
||||
|
||||
## Double dispatch through the Visitor pattern
|
||||
|
||||
Sometimes single dispatch is not enough, such as in the collision example at the
|
||||
beginning of this article. In cases where a computation depends on the dynamic
|
||||
type of *two* of its values, we can make use of double-dispatch by leveraging
|
||||
the Visitor design pattern. This is done by calling a virtual method on the
|
||||
first value, which itself will call a virtual method on the second value.
|
||||
|
||||
Here's a commentated example:
|
||||
|
||||
```cpp
|
||||
struct Asteroid;
|
||||
struct Spaceship;
|
||||
|
||||
struct SpaceObject {
|
||||
virtual ~SpaceObject() = default;
|
||||
|
||||
// Only used to kick-start the double-dispatch process
|
||||
virtual void collide_with(SpaceObject& other) = 0;
|
||||
|
||||
// The actual dispatching methods
|
||||
virtual void collide_with(Asteroid& other) = 0;
|
||||
virtual void collide_with(Spaceship& other) = 0;
|
||||
};
|
||||
|
||||
struct Asteroid : SpaceObject {
|
||||
void collide_with(SpaceObject& other) override {
|
||||
// `*this` is an `Asteroid&` which kick-starts the double-dispatch
|
||||
other.collide_with(*this);
|
||||
};
|
||||
|
||||
void collide_with(Asteroid& other) override { /* Asteroid/Asteroid */ };
|
||||
void collide_with(Spaceship& other) override { /* Asteroid/Spaceship */ };
|
||||
};
|
||||
|
||||
struct Spaceship : SpaceObject {
|
||||
void collide_with(SpaceObject& other) override {
|
||||
// `*this` is a `Spaceship&` which kick-starts the double-dispatch
|
||||
other.collide_with(*this);
|
||||
};
|
||||
|
||||
void collide_with(Asteroid& other) override { /* Spaceship/Asteroid */ };
|
||||
void collide_with(Spaceship& other) override { /* Spaceship/Spaceship */ };
|
||||
};
|
||||
|
||||
void collide(SpaceObject& first, SpaceObject& second) {
|
||||
first.collide_with(second);
|
||||
};
|
||||
|
||||
int main() {
|
||||
auto asteroid = std::make_unique<Asteroid>();
|
||||
auto spaceship = std::make_unique<Spaceship>();
|
||||
|
||||
collide(*asteroid, *spaceship);
|
||||
// Calls in order:
|
||||
// - Asteroid::collide_with(SpaceObject&)
|
||||
// - Spaceship::collide_with(Asteroid&)
|
||||
|
||||
collide(*spaceship, *asteroid);
|
||||
// Calls in order:
|
||||
// - Spaceship::collide_with(SpaceObject&)
|
||||
// - Asteroid::collide_with(Spaceship&)
|
||||
|
||||
asteroid->collide_with(*spaceship);
|
||||
// Only calls Asteroid::collide_with(Spaceship&)
|
||||
|
||||
spaceship->collide_with(*asteroid);
|
||||
// Only calls Spaceship::collide_with(Asteroid&)
|
||||
}
|
||||
```
|
||||
|
||||
Double dispatch is pattern is most commonly used with the *visitor pattern*, in
|
||||
which a closed class hierarchy (the data) is separated from an open class
|
||||
hierarchy (the algorithms acting on that data). This is especially useful in
|
||||
e.g: compilers, where the AST class hierarchy represents the data *only*, and
|
||||
all compiler stages and optimization passes are programmed by a series of
|
||||
visitors.
|
||||
|
||||
One downside of this approach is that if you want to add `SpaceStation` as
|
||||
a sub-class of `SpaceObject`, and handle its collisions with other
|
||||
`SpaceObject`s, you need to:
|
||||
|
||||
* Implement all `collide_with` methods for this new class.
|
||||
* Add a new virtual method `collide_with(SpaceStation&)` and implement it on
|
||||
every sub-class.
|
||||
|
||||
This can be inconvenient if your class hierarchy changes often.
|
||||
|
||||
## Multiple dispatch on a closed class hierarchy
|
||||
|
||||
When even double dispatch is not enough, there is a way to do multiple dispatch
|
||||
in standard C++, included in the STL since C++17. However unlike the previous
|
||||
methods I showed, this one relies on using [`std::variant`][variant-cppref] and
|
||||
[`std::visit`][visit-cppref].
|
||||
|
||||
[variant-cppref]: https://en.cppreference.com/w/cpp/utility/variant
|
||||
[visit-cppref]: https://en.cppreference.com/w/cpp/utility/variant/visit
|
||||
|
||||
The limitation of `std::variant` is that you are limited to the types you can
|
||||
select at *compile-time* for the values used during your dispatch operation.
|
||||
You have a *closed* hierarchy of classes, which is the explicit list of types in
|
||||
your `variant`.
|
||||
|
||||
Nonetheless, if you can live with that limitation, then you have a great amount
|
||||
of power available to you. I have used `std::visit` in the past to mimic the
|
||||
effect of pattern matching.
|
||||
|
||||
In this example, I re-create the double-dispatch from the previous section:
|
||||
|
||||
```cpp
|
||||
// No need to inherit from a `SpaceObject` base class
|
||||
struct Asteroid {};
|
||||
struct Spaceship {};
|
||||
|
||||
// But the list of possible runtime *must* be enumerated at compile-time
|
||||
using SpaceObject = std::variant<Asteroid, Spaceship>;
|
||||
|
||||
void collide(SpaceObject& first, SpaceObject& second) {
|
||||
struct CollideDispatch {
|
||||
void operator()(Asteroid& first, Asteroid& second) {
|
||||
// Asteroid/Asteroid
|
||||
}
|
||||
void operator()(Asteroid& first, Spaceship& second) {
|
||||
// Asteroid/Spaceship
|
||||
}
|
||||
void operator()(Spaceship& first, Asteroid& second) {
|
||||
// Spaceship/Asteroid
|
||||
}
|
||||
void operator()(Spaceship& first, Spaceship& second) {
|
||||
// Spaceship/Spaceship
|
||||
}
|
||||
};
|
||||
|
||||
std::visit(CollideDispatch(), first, second);
|
||||
}
|
||||
|
||||
int main() {
|
||||
SpaceObject asteroid = Asteroid();
|
||||
SpaceObject spaceship = Spaceship();
|
||||
|
||||
collide(asteroid, spaceship);
|
||||
// Calls CollideDispatch::operator()(Asteroid&, Spaceship&)
|
||||
|
||||
collide(spaceship, asteroid);
|
||||
// Calls CollideDispatch::operator()(Spaceship&, Asteroid&)
|
||||
}
|
||||
```
|
||||
|
||||
Obviously, the issue with adding a new `SpaceStation` variant is once again
|
||||
apparent in this implementation. You will get a compile error unless you handle
|
||||
this new `SpaceStation` variant at every point you `visit` the `SpaceObject`s.
|
||||
|
||||
## The Expression Problem
|
||||
|
||||
One issue we have not been able to move past in these examples is the
|
||||
[Expression Problem][expression-problem]. In two words, this means that we can't
|
||||
add a new data type (e.g: `SpaceStation`), or a new operation (e.g: `land_on`)
|
||||
to our current code without re-compiling it.
|
||||
|
||||
[expression-problem]: https://en.wikipedia.org/wiki/Expression_problem
|
||||
|
||||
This is the downside I was pointing out in our previous sections:
|
||||
|
||||
* Data type extension: one can easily add a new `SpaceObject` child-class in the
|
||||
OOP version, but needs to modify each implementation if we want to add a new
|
||||
method to the `SpaceObject` interface to implement a new operation.
|
||||
* Operation extension: one can easily create a new function when using the
|
||||
`std::variant` based representation, as pattern-matching easily allows us to
|
||||
only handle the kinds of values we are interested in. But adding a new
|
||||
`SpaceObject` variant means we need to modify and re-compile every
|
||||
`std::visit` call to handle the new variant.
|
||||
|
||||
There is currently no (good) way in standard C++ to tackle the Expression
|
||||
Problem. A paper ([N2216][N2216]) was written to propose a new language feature
|
||||
to improve the situation. However it looks quite complex, and never got followed
|
||||
up on for standardization.
|
||||
|
||||
[N2216]: https://open-std.org/jtc1/sc22/wg21/docs/papers/2007/n2216.pdf
|
||||
|
||||
In the meantime, one can find some libraries (like [`yomm2`][yomm2]) that
|
||||
reduce the amount of boiler-plate needed to emulate this feature.
|
||||
|
||||
[yomm2]: https://github.com/jll63/yomm2
|
||||
|
||||
```cpp
|
||||
#include <yorel/yomm2/keywords.hpp>
|
||||
|
||||
struct SpaceObject {
|
||||
virtual ~SpaceObject() = default;
|
||||
};
|
||||
|
||||
struct Asteroid : SpaceObject { /* fields, methods, etc... */ };
|
||||
|
||||
struct Spaceship : SpaceObject { /* fields, methods, etc... */ };
|
||||
|
||||
// Register all sub-classes of `SpaceObject` for use with open methods
|
||||
register_classes(SpaceObject, Asteroid, Spaceship);
|
||||
|
||||
// Register the `collide` open method, which dispatches on two arguments
|
||||
declare_method(void, collide, (virtual_<SpaceObject&>, virtual_<SpaceObject&>));
|
||||
|
||||
// Write the different implementations of `collide`
|
||||
define_method(void, collide, (Asteroid& left, Asteroid& right)) { /* work */ }
|
||||
define_method(void, collide, (Asteroid& left, Spaceship& right)) { /* work */ }
|
||||
define_method(void, collide, (Spaceship& left, Asteroid& right)) { /* work */ }
|
||||
define_method(void, collide, (Spaceship& left, Spaceship& right)) { /* work */ }
|
||||
|
||||
|
||||
int main() {
|
||||
yorel::yomm2::update_methods();
|
||||
|
||||
auto asteroid = std::make_unique<Asteroid>();
|
||||
auto spaceship = std::make_unique<Spaceship>();
|
||||
|
||||
collide(*asteroid, *spaceship); // Calls (Asteroid, Spaceship) version
|
||||
collide(*spaceship, *asteroid); // Calls (Spaceship, Asteroid) version
|
||||
collide(*asteroid, *asteroid); // Calls (Asteroid, Asteroid) version
|
||||
collide(*spaceship, *spaceship); // Calls (Spaceship, Spaceship) version
|
||||
}
|
||||
```
|
||||
|
|
@ -1,157 +0,0 @@
|
|||
---
|
||||
title: "Union Find"
|
||||
date: 2024-06-24T21:07:49+01:00
|
||||
draft: false # I don't care for draft mode, git has branches for that
|
||||
description: "My favorite data structure"
|
||||
tags:
|
||||
- algorithms
|
||||
- data structures
|
||||
- python
|
||||
categories:
|
||||
- programming
|
||||
series:
|
||||
- Cool algorithms
|
||||
favorite: false
|
||||
disable_feed: false
|
||||
---
|
||||
|
||||
To kickoff the [series]({{< ref "/series/cool-algorithms/" >}}) of posts about
|
||||
algorithms and data structures I find interesting, I will be talking about my
|
||||
favorite one: the [_Disjoint Set_][wiki]. Also known as the _Union-Find_ data
|
||||
structure, so named because of its two main operations: `ds.union(lhs, rhs)` and
|
||||
`ds.find(elem)`.
|
||||
|
||||
[wiki]: https://en.wikipedia.org/wiki/Disjoint-set_data_structure
|
||||
|
||||
<!--more-->
|
||||
|
||||
## What does it do?
|
||||
|
||||
The _Union-Find_ data structure allows one to store a collection of sets of
|
||||
elements, with operations for adding new sets, merging two sets into one, and
|
||||
finding the representative member of a set. Not only does it do all that, but it
|
||||
does it in almost constant (amortized) time!
|
||||
|
||||
Here is a small motivating example for using the _Disjoint Set_ data structure:
|
||||
|
||||
```python
|
||||
def connected_components(graph: Graph) -> list[set[Node]]:
|
||||
# Initialize the disjoint set so that each node is in its own set
|
||||
ds: DisjointSet[Node] = DisjointSet(graph.nodes)
|
||||
# Each edge is a connection, merge both sides into the same set
|
||||
for (start, dest) in graph.edges:
|
||||
ds.union(start, dest)
|
||||
# Connected components share the same (arbitrary) root
|
||||
components: dict[Node, set[Node]] = defaultdict(set)
|
||||
for n in graph.nodes:
|
||||
components[ds.find(n)].add(n)
|
||||
# Return a list of disjoint sets corresponding to each connected component
|
||||
return list(components.values())
|
||||
```
|
||||
|
||||
## Implementation
|
||||
|
||||
I will show how to implement `UnionFind` for integers, though it can easily be
|
||||
extended to be used with arbitrary types (e.g: by mapping each element
|
||||
one-to-one to a distinct integer, or using a different set representation).
|
||||
|
||||
### Representation
|
||||
|
||||
Creating a new disjoint set is easy enough:
|
||||
|
||||
```python
|
||||
class UnionFind:
|
||||
_parent: list[int]
|
||||
_rank: list[int]
|
||||
|
||||
def __init__(self, size: int):
|
||||
# Each node is in its own set, making it its own parent...
|
||||
self._parents = list(range(size))
|
||||
# ... And its rank 0
|
||||
self._rank = [0] * size
|
||||
```
|
||||
|
||||
We represent each set through the `_parent` field: each element of the set is
|
||||
linked to its parent, until the root node which is its own parent. When first
|
||||
initializing the structure, each element is in its own set, so we initialize
|
||||
each element to be a root and make it its own parent (`_parent[i] == i` for all
|
||||
`i`).
|
||||
|
||||
The `_rank` field is an optimization which we will touch on in a later section.
|
||||
|
||||
### Find
|
||||
|
||||
A naive Implementation of `find(...)` is simple enough to write:
|
||||
|
||||
```python
|
||||
def find(self, elem: int) -> int:
|
||||
# If `elem` is its own parent, then it is the root of the tree
|
||||
if (parent := self._parent[elem]) == elem:
|
||||
return elem
|
||||
# Otherwise, recurse on the parent
|
||||
return self.find(parent)
|
||||
```
|
||||
|
||||
However, going back up the chain of parents each time we want to find the root
|
||||
node (an `O(n)` operation) would make for disastrous performance. Instead we can
|
||||
do a small optimization called _path splitting_.
|
||||
|
||||
```python
|
||||
def find(self, elem: int) -> int:
|
||||
while (parent := self._parent[elem]) != elem:
|
||||
# Replace each parent link by a link to the grand-parent
|
||||
elem, self._parent[elem] = parent, self._parent[parent]
|
||||
return elem
|
||||
```
|
||||
|
||||
This flattens the chain so that each node links more directly to the root (the
|
||||
length is reduced by half), making each subsequent `find(...)` faster.
|
||||
|
||||
Other compression schemes exist, along the spectrum between faster shortening
|
||||
the chain faster earlier, or updating `_parent` fewer times per `find(...)`.
|
||||
|
||||
### Union
|
||||
|
||||
A naive implementation of `union(...)` is simple enough to write:
|
||||
|
||||
```python
|
||||
def union(self, lhs: int, rhs: int) -> int:
|
||||
# Replace both element by their root parent
|
||||
lhs = self.find(lhs)
|
||||
rhs = self.find(rhs)
|
||||
# arbitrarily merge one into the other
|
||||
self._parent[rhs] = lhs
|
||||
# Return the new root
|
||||
return lhs
|
||||
```
|
||||
|
||||
Once again, improvements can be made. Depending on the order in which we call
|
||||
`union(...)`, we might end up creating a long chain from the leaf of the tree to
|
||||
the root node, leading to slower `find(...)` operations. If at all possible, we
|
||||
would like to keep the trees as shallow as possible.
|
||||
|
||||
To do so, we want to avoid merging taller trees into smaller ones, so as to keep
|
||||
them as balanced as possible. Since a higher tree will result in a slower
|
||||
`find(...)`, keeping the trees balanced will lead to increased performance.
|
||||
|
||||
This is where the `_rank` field we mentioned earlier comes in: the _rank_ of an
|
||||
element is an upper bound on its height in the tree. By keeping track of this
|
||||
_approximate_ height, we can keep the trees balanced when merging them.
|
||||
|
||||
```python
|
||||
def union(self, lhs: int, rhs: int) -> int:
|
||||
lhs = self.find(lhs)
|
||||
rhs = self.find(rhs)
|
||||
# Bail out early if they already belong to the same set
|
||||
if lhs == rhs:
|
||||
return lhs
|
||||
# Always keep `lhs` as the taller tree
|
||||
if (self._rank[lhs] < self._rank[rhs])
|
||||
lhs, rhs = rhs, lhs
|
||||
# Merge the smaller tree into the taller one
|
||||
self._parent[rhs] = lhs
|
||||
# Update the rank when merging trees of approximately the same size
|
||||
if self._rank[lhs] == self._rank[rhs]:
|
||||
self._rank[lhs] += 1
|
||||
return lhs
|
||||
```
|
||||
|
|
@ -1,171 +0,0 @@
|
|||
---
|
||||
title: "Trie"
|
||||
date: 2024-06-30T11:07:49+01:00
|
||||
draft: false # I don't care for draft mode, git has branches for that
|
||||
description: "A cool map"
|
||||
tags:
|
||||
- algorithms
|
||||
- data structures
|
||||
- python
|
||||
categories:
|
||||
- programming
|
||||
series:
|
||||
- Cool algorithms
|
||||
favorite: false
|
||||
disable_feed: false
|
||||
---
|
||||
|
||||
This time, let's talk about the [_Trie_][wiki], which is a tree-based mapping
|
||||
structure most often used for string keys.
|
||||
|
||||
[wiki]: https://en.wikipedia.org/wiki/Trie
|
||||
|
||||
<!--more-->
|
||||
|
||||
## What does it do?
|
||||
|
||||
A _Trie_ can be used to map a set of string keys to their corresponding values,
|
||||
without the need for a hash function. This also means you won't suffer from hash
|
||||
collisions, though the tree-based structure will probably translate to slower
|
||||
performance than a good hash table.
|
||||
|
||||
A _Trie_ is especially useful to represent a dictionary of words in the case of
|
||||
spell correction, as it can easily be used to fuzzy match words under a given
|
||||
edit distance (think [Levenshtein distance])
|
||||
|
||||
[Levenshtein distance]: https://en.wikipedia.org/wiki/Levenshtein_distance
|
||||
|
||||
## Implementation
|
||||
|
||||
This implementation will be in Python for exposition purposes, even though
|
||||
it already has a built-in `dict`.
|
||||
|
||||
### Representation
|
||||
|
||||
Creating a new `Trie` is easy: the root node starts off empty and without any
|
||||
mapped values.
|
||||
|
||||
```python
|
||||
class Trie[T]:
|
||||
_children: dict[str, Trie[T]]
|
||||
_value: T | None
|
||||
|
||||
def __init__(self):
|
||||
# Each letter is mapped to a Trie
|
||||
self._children = defaultdict(Trie)
|
||||
# If we match a full string, we store the mapped value
|
||||
self._value = None
|
||||
```
|
||||
|
||||
We're using a `defaultdict` for the children for ease of implementation in this
|
||||
post. In reality, I would encourage you exit early when you can't match a given
|
||||
character.
|
||||
|
||||
The string key will be implicit by the position of a node in the tree: the empty
|
||||
string at the root, one-character strings as its direct children, etc...
|
||||
|
||||
### Search
|
||||
|
||||
An exact match look-up is easily done: we go down the tree until we've exhausted
|
||||
the key. At that point we've either found a mapped value or not.
|
||||
|
||||
```python
|
||||
def get(self, key: str) -> T | None:
|
||||
# Have we matched the full key?
|
||||
if not key:
|
||||
# Store the `T` if mapped, `None` otherwise
|
||||
return self._value
|
||||
# Otherwise, recurse on the child corresponding to the first letter
|
||||
return self._children[key[0]].get(key[1:])
|
||||
```
|
||||
|
||||
### Insertion
|
||||
|
||||
Adding a new value to the _Trie_ is similar to a key lookup, only this time we
|
||||
store the new value instead of returning it.
|
||||
|
||||
```python
|
||||
def insert(self, key: str, value: T) -> bool:
|
||||
# Have we matched the full key?
|
||||
if not key:
|
||||
# Check whether we're overwriting a previous mapping
|
||||
was_mapped = self._value is None
|
||||
# Store the corresponding value
|
||||
self._value = value
|
||||
# Return whether we've performed an overwrite
|
||||
return was_mapped
|
||||
# Otherwise, recurse on the child corresponding to the first letter
|
||||
return self._children[key[0]].insert(key[1:], value)
|
||||
```
|
||||
|
||||
### Removal
|
||||
|
||||
Removal should also look familiar.
|
||||
|
||||
```python
|
||||
def remove(self, key: str) -> bool:
|
||||
# Have we matched the full key?
|
||||
if not key:
|
||||
was_mapped = self._value is None
|
||||
# Remove the value
|
||||
self._value = None
|
||||
# Return whether it was mapped
|
||||
return was_mapped
|
||||
# Otherwise, recurse on the child corresponding to the first letter
|
||||
return self._children[key[0]].remove(key[1:])
|
||||
```
|
||||
|
||||
### Fuzzy matching
|
||||
|
||||
Fuzzily matching a given word is where the real difficulty is: the key is to
|
||||
realize we can use the prefix-tree nature of a _Trie_ to avoid doing wasteful
|
||||
work.
|
||||
|
||||
By leveraging the prefix visit order of the tree, we can build an iterative
|
||||
Levenshtein distance matrix, in much the same way one would do so in its
|
||||
[Dynamic Programming] implementation (see the [Wagner-Fisher algorithm]).
|
||||
|
||||
[Dynamic Programming]: https://en.wikipedia.org/wiki/Dynamic_programming
|
||||
[Wagner-Fisher algorithm]: https://en.wikipedia.org/wiki/Wagner%E2%80%93Fischer_algorithm
|
||||
|
||||
```python
|
||||
class FuzzyResult[T](NamedTuple):
|
||||
distance: int
|
||||
key: str
|
||||
value: T
|
||||
|
||||
|
||||
def get_fuzzy(self, key: str, max_distance: int = 0) -> Iterator[FuzzyResult[T]]:
|
||||
def helper(
|
||||
current_word: str,
|
||||
node: Trie[T],
|
||||
previous_row: list[int],
|
||||
) -> Iterator[tuple[int, T]]:
|
||||
# Iterative Levenshtein
|
||||
current_row = [previous_row[0] + 1]
|
||||
current_char = current_word[-1]
|
||||
for column, key_char in enumerate(key, start=1):
|
||||
insertion = current_row[column - 1] + 1
|
||||
deletion = previous_row[column] + 1
|
||||
replacement = previous_row[column - 1] + (key_char != current_char)
|
||||
current_row.append(min(insertion, deletion, replacement))
|
||||
|
||||
# If we are under the max distance, match this node
|
||||
if (distance := current_row[-1]) <= max_distance and node._value != None:
|
||||
# Only if it has a value of course
|
||||
yield FuzzyResult(distance, current_word, node._value)
|
||||
|
||||
# If we can potentially still match children, recurse
|
||||
if min(current_row) <= max_distance:
|
||||
for c, child in node._children.items():
|
||||
yield from helper(current_word + c, child, current_row)
|
||||
|
||||
# Build the first row -- the edit distance from the empty string
|
||||
row = list(range(len(key) + 1))
|
||||
|
||||
# Base case for the empty string
|
||||
if (distance := row[-1]) <= max_distance and self._value != None:
|
||||
yield FuzzyResult(distance, "", self._value)
|
||||
for c, child in self._children.items():
|
||||
yield from helper(c, child, row)
|
||||
```
|
||||
|
|
@ -1,191 +0,0 @@
|
|||
---
|
||||
title: "Gap Buffer"
|
||||
date: 2024-07-06T21:27:19+01:00
|
||||
draft: false # I don't care for draft mode, git has branches for that
|
||||
description: "As featured in GNU Emacs"
|
||||
tags:
|
||||
- algorithms
|
||||
- data structures
|
||||
- python
|
||||
categories:
|
||||
- programming
|
||||
series:
|
||||
- Cool algorithms
|
||||
favorite: false
|
||||
disable_feed: false
|
||||
---
|
||||
|
||||
The [_Gap Buffer_][wiki] is a popular data structure for text editors to
|
||||
represent files and editable buffers. The most famous of them probably being
|
||||
[GNU Emacs][emacs].
|
||||
|
||||
[wiki]: https://en.wikipedia.org/wiki/Gap_buffer
|
||||
[emacs]: https://www.gnu.org/software/emacs/manual/html_node/elisp/Buffer-Gap.html
|
||||
|
||||
<!--more-->
|
||||
|
||||
## What does it do?
|
||||
|
||||
A _Gap Buffer_ is simply a list of characters, similar to a normal string, with
|
||||
the added twist of splitting it into two side: the prefix and suffix, on either
|
||||
side of the cursor. In between them, a gap is left to allow for quick
|
||||
insertion at the cursor.
|
||||
|
||||
Moving the cursor moves the gap around the buffer, the prefix and suffix getting
|
||||
shorter/longer as required.
|
||||
|
||||
## Implementation
|
||||
|
||||
I'll be writing a sample implementation in Python, as with the rest of the
|
||||
[series]({{< ref "/series/cool-algorithms/" >}}). I don't think it showcases the
|
||||
elegance of the _Gap Buffer_ in action like a C implementation full of
|
||||
`memmove`s would, but it does makes it short and sweet.
|
||||
|
||||
### Representation
|
||||
|
||||
We'll be representing the gap buffer as an actual list of characters.
|
||||
|
||||
Given that Python doesn't _have_ characters, let's settle for a list of strings,
|
||||
each representing a single character...
|
||||
|
||||
```python
|
||||
Char = str
|
||||
|
||||
class GapBuffer:
|
||||
# List of characters, contains prefix and suffix of string with gap in the middle
|
||||
_buf: list[Char]
|
||||
# The gap is contained between [start, end) (i.e: buf[start:end])
|
||||
_gap_start: int
|
||||
_gap_end: int
|
||||
|
||||
# Visual representation of the gap buffer:
|
||||
# This is a very [ ]long string.
|
||||
# |<----------------------------------------------->| capacity
|
||||
# |<------------>| |<-------->| string
|
||||
# |<------------------->| gap
|
||||
# |<------------>| prefix
|
||||
# |<-------->| suffix
|
||||
def __init__(self, initial_capacity: int = 16) -> None:
|
||||
assert initial_capacity > 0
|
||||
# Initialize an empty gap buffer
|
||||
self._buf = [""] * initial_capacity
|
||||
self._gap_start = 0
|
||||
self._gap_end = initial_capacity
|
||||
```
|
||||
|
||||
### Accessors
|
||||
|
||||
I'm mostly adding these for exposition, and making it easier to write `assert`s
|
||||
later.
|
||||
|
||||
```python
|
||||
@property
|
||||
def capacity(self) -> int:
|
||||
return len(self._buf)
|
||||
|
||||
@property
|
||||
def gap_length(self) -> int:
|
||||
return self._gap_end - self._gap_start
|
||||
|
||||
@property
|
||||
def string_length(self) -> int:
|
||||
return self.capacity - self.gap_length
|
||||
|
||||
@property
|
||||
def prefix_length(self) -> int:
|
||||
return self._gap_start
|
||||
|
||||
@property
|
||||
def suffix_length(self) -> int:
|
||||
return self.capacity - self._gap_end
|
||||
```
|
||||
|
||||
### Growing the buffer
|
||||
|
||||
I've written this method in a somewhat non-idiomatic manner, to make it closer
|
||||
to how it would look in C using `realloc` instead.
|
||||
|
||||
It would be more efficient to use slicing to insert the needed extra capacity
|
||||
directly, instead of making a new buffer and copying characters over.
|
||||
|
||||
```python
|
||||
def grow(self, capacity: int) -> None:
|
||||
assert capacity >= self.capacity
|
||||
# Create a new buffer with the new capacity
|
||||
new_buf = [""] * capacity
|
||||
# Move the prefix/suffix to their place in the new buffer
|
||||
added_capacity = capacity - len(self._buf)
|
||||
new_buf[: self._gap_start] = self._buf[: self._gap_start]
|
||||
new_buf[self._gap_end + added_capacity :] = self._buf[self._gap_end :]
|
||||
# Use the new buffer, account for added capacity
|
||||
self._buf = new_buf
|
||||
self._gap_end += added_capacity
|
||||
```
|
||||
|
||||
### Insertion
|
||||
|
||||
Inserting text at the cursor's position means filling up the gap in the middle
|
||||
of the buffer. To do so we must first make sure that the gap is big enough, or
|
||||
grow the buffer accordingly.
|
||||
|
||||
Then inserting the text is simply a matter of copying its characters in place,
|
||||
and moving the start of the gap further right.
|
||||
|
||||
```python
|
||||
def insert(self, val: str) -> None:
|
||||
# Ensure we have enough space to insert the whole string
|
||||
if len(val) > self.gap_length:
|
||||
self.grow(max(self.capacity * 2, self.string_length + len(val)))
|
||||
# Fill the gap with the given string
|
||||
self._buf[self._gap_start : self._gap_start + len(val)] = val
|
||||
self._gap_start += len(val)
|
||||
```
|
||||
|
||||
### Deletion
|
||||
|
||||
Removing text from the buffer simply expands the gap in the corresponding
|
||||
direction, shortening the string's prefix/suffix. This makes it very cheap.
|
||||
|
||||
The methods are named after the `backspace` and `delete` keys on the keyboard.
|
||||
|
||||
```python
|
||||
def backspace(self, dist: int = 1) -> None:
|
||||
assert dist <= self.prefix_length
|
||||
# Extend gap to the left
|
||||
self._gap_start -= dist
|
||||
|
||||
def delete(self, dist: int = 1) -> None:
|
||||
assert dist <= self.suffix_length
|
||||
# Extend gap to the right
|
||||
self._gap_end += dist
|
||||
```
|
||||
|
||||
### Moving the cursor
|
||||
|
||||
Moving the cursor along the buffer will shift letters from one side of the gap
|
||||
to the other, moving them across from prefix to suffix and back.
|
||||
|
||||
I find Python's list slicing not quite as elegant to read as a `memmove`, though
|
||||
it does make for a very small and efficient implementation.
|
||||
|
||||
```python
|
||||
def left(self, dist: int = 1) -> None:
|
||||
assert dist <= self.prefix_length
|
||||
# Shift the needed number of characters from end of prefix to start of suffix
|
||||
self._buf[self._gap_end - dist : self._gap_end] = self._buf[
|
||||
self._gap_start - dist : self._gap_start
|
||||
]
|
||||
# Adjust indices accordingly
|
||||
self._gap_start -= dist
|
||||
self._gap_end -= dist
|
||||
|
||||
def right(self, dist: int = 1) -> None:
|
||||
assert dist <= self.suffix_length
|
||||
# Shift the needed number of characters from start of suffix to end of prefix
|
||||
self._buf[self._gap_start : self._gap_start + dist] = self._buf[
|
||||
self._gap_end : self._gap_end + dist
|
||||
]
|
||||
# Adjust indices accordingly
|
||||
self._gap_start += dist
|
||||
self._gap_end += dist
|
||||
```
|
||||
|
|
@ -1,97 +0,0 @@
|
|||
---
|
||||
title: "Bloom Filter"
|
||||
date: 2024-07-14T17:46:40+01:00
|
||||
draft: false # I don't care for draft mode, git has branches for that
|
||||
description: "Probably cool"
|
||||
tags:
|
||||
- algorithms
|
||||
- data structures
|
||||
- python
|
||||
categories:
|
||||
- programming
|
||||
series:
|
||||
- Cool algorithms
|
||||
favorite: false
|
||||
disable_feed: false
|
||||
---
|
||||
|
||||
The [_Bloom Filter_][wiki] is a probabilistic data structure for set membership.
|
||||
|
||||
The filter can be used as an inexpensive first step when querying the actual
|
||||
data is quite costly (e.g: as a first check for expensive cache lookups or large
|
||||
data seeks).
|
||||
|
||||
[wiki]: https://en.wikipedia.org/wiki/Bloom_filter
|
||||
|
||||
<!--more-->
|
||||
|
||||
## What does it do?
|
||||
|
||||
A _Bloom Filter_ can be understood as a hash-set which can either tell you:
|
||||
|
||||
* An element is _not_ part of the set.
|
||||
* An element _may be_ part of the set.
|
||||
|
||||
More specifically, one can tweak the parameters of the filter to make it so that
|
||||
the _false positive_ rate of membership is quite low.
|
||||
|
||||
I won't be going into those calculations here, but they are quite trivial to
|
||||
compute, or one can just look up appropriate values for their use case.
|
||||
|
||||
## Implementation
|
||||
|
||||
I'll be using Python, which has the nifty ability of representing bitsets
|
||||
through its built-in big integers quite easily.
|
||||
|
||||
We'll be assuming a `BIT_COUNT` of 64 here, but the implementation can easily be
|
||||
tweaked to use a different number, or even change it at construction time.
|
||||
|
||||
### Representation
|
||||
|
||||
A `BloomFilter` is just a set of bits and a list of hash functions.
|
||||
|
||||
```python
|
||||
BIT_COUNT = 64
|
||||
|
||||
class BloomFilter[T]:
|
||||
_bits: int
|
||||
_hash_functions: list[Callable[[T], int]]
|
||||
|
||||
def __init__(self, hash_functions: list[Callable[[T], int]]) -> None:
|
||||
# Filter is initially empty
|
||||
self._bits = 0
|
||||
self._hash_functions = hash_functions
|
||||
```
|
||||
|
||||
### Inserting a key
|
||||
|
||||
To add an element to the filter, we take the output from each hash function and
|
||||
use that to set a bit in the filter. This combination of bit will identify the
|
||||
element, which we can use for lookup later.
|
||||
|
||||
```python
|
||||
def insert(self, val: T) -> None:
|
||||
# Iterate over each hash
|
||||
for f in self._hash_functions:
|
||||
n = f(val) % BIT_COUNT
|
||||
# Set the corresponding bit
|
||||
self._bit |= 1 << n
|
||||
```
|
||||
|
||||
### Querying a key
|
||||
|
||||
Because the _Bloom Filter_ does not actually store its elements, but some
|
||||
derived data from hashing them, it can only definitely say if an element _does
|
||||
not_ belong to it. Otherwise, it _may_ be part of the set, and should be checked
|
||||
against the actual underlying store.
|
||||
|
||||
```python
|
||||
def may_contain(self, val: T) -> bool:
|
||||
for f in self._hash_functions:
|
||||
n = f(val) % BIT_COUNT
|
||||
# If one of the bits is unset, the value is definitely not present
|
||||
if not (self._bit & (1 << n)):
|
||||
return False
|
||||
# All bits were matched, `val` is likely to be part of the set
|
||||
return True
|
||||
```
|
||||
|
|
@ -1,159 +0,0 @@
|
|||
---
|
||||
title: "Treap"
|
||||
date: 2024-07-20T14:12:27+01:00
|
||||
draft: false # I don't care for draft mode, git has branches for that
|
||||
description: "A simpler BST"
|
||||
tags:
|
||||
- algorithms
|
||||
- data structures
|
||||
- python
|
||||
categories:
|
||||
- programming
|
||||
series:
|
||||
- Cool algorithms
|
||||
favorite: false
|
||||
disable_feed: false
|
||||
graphviz: true
|
||||
---
|
||||
|
||||
The [_Treap_][wiki] is a mix between a _Binary Search Tree_ and a _Heap_.
|
||||
|
||||
Like a _Binary Search Tree_, it keeps an ordered set of keys in the shape of a
|
||||
tree, allowing for binary search traversal.
|
||||
|
||||
Like a _Heap_, it associates each node with a priority, making sure that a
|
||||
parent's priority is always higher than any of its children.
|
||||
|
||||
[wiki]: https://en.wikipedia.org/wiki/Treap
|
||||
|
||||
<!--more-->
|
||||
|
||||
## What does it do?
|
||||
|
||||
By randomizing the priority value of each key at insertion time, we ensure a
|
||||
high likelihood that the tree stays _roughly_ balanced, avoiding degenerating to
|
||||
unbalanced O(N) height.
|
||||
|
||||
Here's a sample tree created by inserting integers from 0 to 250 into the tree:
|
||||
|
||||
{{< graphviz file="treap.gv" />}}
|
||||
|
||||
## Implementation
|
||||
|
||||
I'll be keeping the theme for this [series] by using Python to implement the
|
||||
_Treap_. This leads to somewhat annoying code to handle the rotation process,
|
||||
which is easier to do in C using pointers.
|
||||
|
||||
[series]: {{< ref "/series/cool-algorithms/" >}}
|
||||
|
||||
### Representation
|
||||
|
||||
Creating a new `Treap` is easy: the tree starts off empty, waiting for new nodes
|
||||
to insert.
|
||||
|
||||
Each `Node` must keep track of the `key`, the mapped `value`, and the node's
|
||||
`priority` (which is assigned randomly). Finally it must also allow for storing
|
||||
two children (`left` and `right`).
|
||||
|
||||
```python
|
||||
class Node[K, V]:
|
||||
key: K
|
||||
value: V
|
||||
priority: float
|
||||
left: Node[K, V] | None
|
||||
righg: Node[K, V] | None
|
||||
|
||||
def __init__(self, key: K, value: V):
|
||||
# Store key and value, like a normal BST node
|
||||
self.key = key
|
||||
self.value = value
|
||||
# Priority is derived randomly
|
||||
self.priority = random()
|
||||
self.left = None
|
||||
self.right = None
|
||||
|
||||
class Treap[K, V]:
|
||||
_root: Node[K, V] | None
|
||||
|
||||
def __init__(self):
|
||||
# The tree starts out empty
|
||||
self._root = None
|
||||
```
|
||||
|
||||
### Search
|
||||
|
||||
Searching the tree is the same as in any other _Binary Search Tree_.
|
||||
|
||||
```python
|
||||
def get(self, key: K) -> T | None:
|
||||
node = self._root
|
||||
# The usual BST traversal
|
||||
while node is not None:
|
||||
if node.key == key:
|
||||
return node.value
|
||||
elif node.key < key:
|
||||
node = node.right
|
||||
else:
|
||||
node = node.left
|
||||
return None
|
||||
```
|
||||
|
||||
### Insertion
|
||||
|
||||
To insert a new `key` into the tree, we identify which leaf position it should
|
||||
be inserted at. We then generate the node's priority, insert it at this
|
||||
position, and rotate the node upwards until the heap property is respected.
|
||||
|
||||
```python
|
||||
type ChildField = Literal["left, right"]
|
||||
|
||||
def insert(self, key: K, value: V) -> bool:
|
||||
# Empty treap base-case
|
||||
if self._root is None:
|
||||
self._root = Node(key, value)
|
||||
# Signal that we're not overwriting the value
|
||||
return False
|
||||
# Keep track of the parent chain for rotation after insertion
|
||||
parents = []
|
||||
node = self._root
|
||||
while node is not None:
|
||||
# Insert a pre-existing key
|
||||
if node.key == key:
|
||||
node.value = value
|
||||
return True
|
||||
# Go down the tree, keep track of the path through the tree
|
||||
field = "left" if key < node.key else "right"
|
||||
parents.append((node, field))
|
||||
node = getattr(node, field)
|
||||
# Key wasn't found, we're inserting a new node
|
||||
child = Node(key, value)
|
||||
parent, field = parents[-1]
|
||||
setattr(parent, field, child)
|
||||
# Rotate the new node up until we respect the decreasing priority property
|
||||
self._rotate_up(child, parents)
|
||||
# Key wasn't found, signal that we inserted a new node
|
||||
return False
|
||||
|
||||
def _rotate_up(
|
||||
self,
|
||||
node: Node[K, V],
|
||||
parents: list[tuple[Node[K, V], ChildField]],
|
||||
) -> None:
|
||||
while parents:
|
||||
parent, field = parents.pop()
|
||||
# If the parent has higher priority, we're done rotating
|
||||
if parent.priority >= node.priority:
|
||||
break
|
||||
# Check for grand-parent/root of tree edge-case
|
||||
if parents:
|
||||
# Update grand-parent to point to the new rotated node
|
||||
grand_parent, field = parents[-1]
|
||||
setattr(grand_parent, field, node)
|
||||
else:
|
||||
# Point the root to the new rotated node
|
||||
self._root = node
|
||||
other_field = "left" if field == "right" else "right"
|
||||
# Rotate the node up
|
||||
setattr(parent, field, getattr(node, other_field))
|
||||
setattr(node, other_field, parent)
|
||||
```
|
||||
File diff suppressed because it is too large
Load diff
|
|
@ -1,146 +0,0 @@
|
|||
---
|
||||
title: "Treap, revisited"
|
||||
date: 2024-07-27T14:12:27+01:00
|
||||
draft: false # I don't care for draft mode, git has branches for that
|
||||
description: "An even simpler BST"
|
||||
tags:
|
||||
- algorithms
|
||||
- data structures
|
||||
- python
|
||||
categories:
|
||||
- programming
|
||||
series:
|
||||
- Cool algorithms
|
||||
favorite: false
|
||||
disable_feed: false
|
||||
---
|
||||
|
||||
My [last post]({{< relref "../2024-07-20-treap/index.md" >}}) about the _Treap_
|
||||
showed an implementation using tree rotations, as is commonly done with [AVL
|
||||
Trees][avl] and [Red Black Trees][rb].
|
||||
|
||||
But the _Treap_ lends itself well to a simple and elegant implementation with no
|
||||
tree rotations. This makes it especially easy to implement the removal of a key,
|
||||
rather than the fiddly process of deletion using tree rotations.
|
||||
|
||||
[avl]: https://en.wikipedia.org/wiki/AVL_tree
|
||||
[rb]: https://en.wikipedia.org/wiki/Red%E2%80%93black_tree
|
||||
|
||||
<!--more-->
|
||||
|
||||
## Implementation
|
||||
|
||||
All operations on the tree will be implemented in terms of two fundamental
|
||||
operations: `split` and `merge`.
|
||||
|
||||
We'll be reusing the same structures as in the last post, so let's skip straight
|
||||
to implementing those fundaments, and building on them for `insert` and
|
||||
`delete`.
|
||||
|
||||
### Split
|
||||
|
||||
Splitting a tree means taking a key, and getting the following output:
|
||||
|
||||
* a `left` node, root of the tree of all keys lower than the input.
|
||||
* an extracted `node` which corresponds to the input `key`.
|
||||
* a `right` node, root of the tree of all keys higher than the input.
|
||||
|
||||
```python
|
||||
type OptionalNode[K, V] = Node[K, V] | None
|
||||
|
||||
class SplitResult(NamedTuple):
|
||||
left: OptionalNode
|
||||
node: OptionalNode
|
||||
right: OptionalNode
|
||||
|
||||
def split(root: OptionalNode[K, V], key: K) -> SplitResult:
|
||||
# Base case, empty tree
|
||||
if root is None:
|
||||
return SplitResult(None, None, None)
|
||||
# If we found the key, simply extract left and right
|
||||
if root.key == key:
|
||||
left, right = root.left, root.right
|
||||
root.left, root.right = None, None
|
||||
return SplitResult(left, root, right)
|
||||
# Otherwise, recurse on the corresponding side of the tree
|
||||
if root.key < key:
|
||||
left, node, right = split(root.right, key)
|
||||
root.right = left
|
||||
return SplitResult(root, node, right)
|
||||
if key < root.key:
|
||||
left, node, right = split(root.left, key)
|
||||
root.left = right
|
||||
return SplitResult(left, node, root)
|
||||
raise RuntimeError("Unreachable")
|
||||
```
|
||||
|
||||
### Merge
|
||||
|
||||
Merging a `left` and `right` tree means (cheaply) building a new tree containing
|
||||
both of them. A pre-condition for merging is that the `left` tree is composed
|
||||
entirely of nodes that are lower than any key in `right` (i.e: as in `left` and
|
||||
`right` after a `split`).
|
||||
|
||||
```python
|
||||
def merge(
|
||||
left: OptionalNode[K, V],
|
||||
right: OptionalNode[K, V],
|
||||
) -> OptionalNode[K, V]:
|
||||
# Base cases, left or right being empty
|
||||
if left is None:
|
||||
return right
|
||||
if right is None:
|
||||
return left
|
||||
# Left has higher priority, it must become the root node
|
||||
if left.priority >= right.priority:
|
||||
# We recursively reconstruct its right sub-tree
|
||||
left.right = merge(left.right, right)
|
||||
return left
|
||||
# Right has higher priority, it must become the root node
|
||||
if left.priority < right.priority:
|
||||
# We recursively reconstruct its left sub-tree
|
||||
right.left = merge(left, right.left)
|
||||
return right
|
||||
raise RuntimeError("Unreachable")
|
||||
```
|
||||
|
||||
### Insertion
|
||||
|
||||
Inserting a node into the tree is done in two steps:
|
||||
|
||||
1. `split` the tree to isolate the middle insertion point
|
||||
2. `merge` it back up to form a full tree with the inserted key
|
||||
|
||||
```python
|
||||
def insert(self, key: K, value: V) -> bool:
|
||||
# `left` and `right` come before/after the key
|
||||
left, node, right = split(self._root, key)
|
||||
was_updated: bool
|
||||
# Create the node, or update its value, if the key was already in the tree
|
||||
if node is None:
|
||||
node = Node(key, value)
|
||||
was_updated = False
|
||||
else:
|
||||
node.value = value
|
||||
was_updated = True
|
||||
# Rebuild the tree with a couple of merge operations
|
||||
self._root = merge(left, merge(node, right))
|
||||
# Signal whether the key was already in the key
|
||||
return was_updated
|
||||
```
|
||||
|
||||
### Removal
|
||||
|
||||
Removing a key from the tree is similar to inserting a new key, and forgetting
|
||||
to insert it back: simply `split` the tree and `merge` it back without the
|
||||
extracted middle node.
|
||||
|
||||
```python
|
||||
def remove(self, key: K) -> bool:
|
||||
# `node` contains the key, or `None` if the key wasn't in the tree
|
||||
left, node, right = split(self._root, key)
|
||||
# Put the tree back together, without the extract node
|
||||
self._root = merge(left, right)
|
||||
# Signal whether `key` was mapped in the tree
|
||||
return node is not None
|
||||
```
|
||||
|
|
@ -1,145 +0,0 @@
|
|||
---
|
||||
title: "Reservoir Sampling"
|
||||
date: 2024-08-02T18:30:56+01:00
|
||||
draft: false # I don't care for draft mode, git has branches for that
|
||||
description: "Elegantly sampling a stream"
|
||||
tags:
|
||||
- algorithms
|
||||
- python
|
||||
categories:
|
||||
- programming
|
||||
series:
|
||||
- Cool algorithms
|
||||
favorite: false
|
||||
disable_feed: false
|
||||
mathjax: true
|
||||
---
|
||||
|
||||
[_Reservoir Sampling_][reservoir] is an [online][online], probabilistic
|
||||
algorithm to uniformly sample $k$ random elements out of a stream of values.
|
||||
|
||||
It's a particularly elegant and small algorithm, only requiring $\Theta(k)$
|
||||
amount of space and a single pass through the stream.
|
||||
|
||||
[reservoir]: https://en.wikipedia.org/wiki/Reservoir_sampling
|
||||
[online]: https://en.wikipedia.org/wiki/Online_algorithm
|
||||
|
||||
<!--more-->
|
||||
|
||||
## Sampling one element
|
||||
|
||||
As an introduction, we'll first focus on fairly sampling one element from the
|
||||
stream.
|
||||
|
||||
```python
|
||||
def sample_one[T](stream: Iterable[T]) -> T:
|
||||
stream_iter = iter(stream)
|
||||
# Sample the first element
|
||||
res = next(stream_iter)
|
||||
for i, val in enumerate(stream_iter, start=1):
|
||||
j = random.randint(0, i)
|
||||
# Replace the sampled element with probability 1/(i + 1)
|
||||
if j == 0:
|
||||
res = val
|
||||
# Return the randomly sampled element
|
||||
return res
|
||||
```
|
||||
|
||||
### Proof
|
||||
|
||||
Let's now prove that this algorithm leads to a fair sampling of the stream.
|
||||
|
||||
We'll be doing proof by induction.
|
||||
|
||||
#### Hypothesis $H_N$
|
||||
|
||||
After iterating through the first $N$ items in the stream,
|
||||
each of them has had an equal $\frac{1}{N}$ probability of being selected as
|
||||
`res`.
|
||||
|
||||
#### Base Case $H_1$
|
||||
|
||||
We can trivially observe that the first element is always assigned to `res`,
|
||||
$\frac{1}{1} = 1$, the hypothesis has been verified.
|
||||
|
||||
#### Inductive Case
|
||||
|
||||
For a given $N$, let us assume that $H_N$ holds. Let us now look at the events
|
||||
of loop iteration where `i = N` (i.e: observation of the $N + 1$-th item in the
|
||||
stream).
|
||||
|
||||
`j = random.randint(0, i)` uniformly selects a value in the range $[0, i]$,
|
||||
a.k.a $[0, N]$. We then have two cases:
|
||||
|
||||
* `j == 0`, with probability $\frac{1}{N + 1}$: we select `val` as the new
|
||||
reservoir element `res`.
|
||||
|
||||
* `j != 0`, with probability $\frac{N}{N + 1}$: we keep the previous value of
|
||||
`res`. By $H_N$, any of the first $N$ elements had a $\frac{1}{N}$ probability
|
||||
of being `res` before at the start of the loop, each element now has a
|
||||
probability $\frac{1}{N} \cdot \frac{N}{N + 1} = \frac{1}{N + 1}$ of being the
|
||||
element.
|
||||
|
||||
And thus, we have proven $H_{N + 1}$ at the end of the loop.
|
||||
|
||||
## Sampling $k$ element
|
||||
|
||||
The code for sampling $k$ elements is very similar to the one-element case.
|
||||
|
||||
```python
|
||||
def sample[T](stream: Iterable[T], k: int = 1) -> list[T]:
|
||||
stream_iter = iter(stream)
|
||||
# Retain the first 'k' elements in the reservoir
|
||||
res = list(itertools.islice(stream_iter, k))
|
||||
for i, val in enumerate(stream_iter, start=k):
|
||||
j = random.randint(0, i)
|
||||
# Replace one element at random with probability k/(i + 1)
|
||||
if j < k:
|
||||
res[j] = val
|
||||
# Return 'k' randomly sampled elements
|
||||
return res
|
||||
```
|
||||
|
||||
### Proof
|
||||
|
||||
Let us once again do a proof by induction, assuming the stream contains at least
|
||||
$k$ items.
|
||||
|
||||
#### Hypothesis $H_N$
|
||||
|
||||
After iterating through the first $N$ items in the stream, each of them has had
|
||||
an equal $\frac{k}{N}$ probability of being sampled from the stream.
|
||||
|
||||
#### Base Case $H_k$
|
||||
|
||||
We can trivially observe that the first $k$ element are sampled at the start of
|
||||
the algorithm, $\frac{k}{k} = 1$, the hypothesis has been verified.
|
||||
|
||||
#### Inductive Case
|
||||
|
||||
For a given $N$, let us assume that $H_N$ holds. Let us now look at the events
|
||||
of the loop iteration where `i = N`, in order to prove $H_{N + 1}$.
|
||||
|
||||
`j = random.randint(0, i)` uniformly selects a value in the range $[0, i]$,
|
||||
a.k.a $[0, N]$. We then have three cases:
|
||||
|
||||
* `j >= k`, with probability $1 - \frac{k}{N + 1}$: we do not modify the
|
||||
sampled reservoir at all.
|
||||
|
||||
* `j < k`, with probability $\frac{k}{N + 1}$: we sample the new element to
|
||||
replace the `j`-th element of the reservoir. Therefore for any element
|
||||
$e \in [0, k[$ we can either have:
|
||||
* $j = e$: the element _is_ replaced, probability $\frac{1}{k}$.
|
||||
* $j \neq e$: the element is _not_ replaced, probability $\frac{k - 1}{k}$.
|
||||
|
||||
We can now compute the probability that a previously sampled element is kept in
|
||||
the reservoir:
|
||||
$1 - \frac{k}{N + 1} + \frac{k}{N + 1} \cdot \frac{k - 1}{k} = \frac{N}{N + 1}$.
|
||||
|
||||
By $H_N$, any of the first $N$ elements had a $\frac{k}{N}$ probability
|
||||
of being sampled before at the start of the loop, each element now has a
|
||||
probability $\frac{k}{N} \cdot \frac{N}{N + 1} = \frac{k}{N + 1}$ of being the
|
||||
element.
|
||||
|
||||
We have now proven that all elements have a probability $\frac{k}{N + 1}$ of
|
||||
being sampled at the end of the loop, therefore $H_{N + 1}$ has been verified.
|
||||
|
|
@ -1,472 +0,0 @@
|
|||
---
|
||||
title: "k-d Tree"
|
||||
date: 2024-08-10T11:50:33+01:00
|
||||
draft: false # I don't care for draft mode, git has branches for that
|
||||
description: "Points in spaaaaace!"
|
||||
tags:
|
||||
- algorithms
|
||||
- data structures
|
||||
- python
|
||||
categories:
|
||||
- programming
|
||||
series:
|
||||
- Cool algorithms
|
||||
favorite: false
|
||||
disable_feed: false
|
||||
---
|
||||
|
||||
The [_k-d Tree_][wiki] is a useful way to map points in space and make them
|
||||
efficient to query.
|
||||
|
||||
I ran into them during my studies in graphics, as they are one of the
|
||||
possible acceleration structures for [ray-casting] operations.
|
||||
|
||||
[wiki]: https://en.wikipedia.org/wiki/K-d_tree
|
||||
[ray-casting]: https://en.wikipedia.org/wiki/Ray_casting
|
||||
|
||||
<!--more-->
|
||||
|
||||
## Implementation
|
||||
|
||||
As usual, this will be in Python, though its lack of proper discriminated enums
|
||||
makes it more verbose than would otherwise be necessary.
|
||||
|
||||
### Pre-requisites
|
||||
|
||||
Let's first define what kind of space our _k-d Tree_ is dealing with. In this
|
||||
instance $k = 3$ just like in the normal world.
|
||||
|
||||
```python
|
||||
class Point(NamedTuple):
|
||||
x: float
|
||||
y: float
|
||||
z: float
|
||||
|
||||
class Axis(IntEnum):
|
||||
X = 0
|
||||
Y = 1
|
||||
Z = 2
|
||||
|
||||
def next(self) -> Axis:
|
||||
# Each level of the tree is split along a different axis
|
||||
return Axis((self + 1) % 3)
|
||||
```
|
||||
|
||||
### Representation
|
||||
|
||||
The tree is represented by `KdTree`, each of its leaf nodes is a `KdLeafNode`
|
||||
and its inner nodes are `KdSplitNode`s.
|
||||
|
||||
For each point in space, the tree can also keep track of an associated value,
|
||||
similar to a dictionary or other mapping data structure. Hence we will make our
|
||||
`KdTree` generic to this mapped type `T`.
|
||||
|
||||
#### Leaf node
|
||||
|
||||
A leaf node contains a number of points that were added to the tree. For each
|
||||
point, we also track their mapped value, hence the `dict[Point, T]`.
|
||||
|
||||
```python
|
||||
class KdLeafNode[T]:
|
||||
points: dict[Point, T]
|
||||
|
||||
def __init__(self):
|
||||
self.points = {}
|
||||
```
|
||||
|
||||
#### Split node
|
||||
|
||||
An inner node must partition the space into two sub-spaces along a given axis
|
||||
and mid-point (thus defining a plane). All points that are "to the left" of the
|
||||
plane will be kept in one child, while all the points "to the right" will be in
|
||||
the other. Similar to a [_Binary Search Tree_][bst]'s inner nodes.
|
||||
|
||||
[bst]: https://en.wikipedia.org/wiki/Binary_search_tree
|
||||
|
||||
```python
|
||||
class KdSplitNode[T]:
|
||||
axis: Axis
|
||||
mid: float
|
||||
children: tuple[KdTreeNode[T], KdTreeNode[T]]
|
||||
|
||||
# Convenience function to index into the child which contains `point`
|
||||
def _index(self, point: Point) -> int:
|
||||
return 0 if point[self.axis] <= self.mid else 1
|
||||
```
|
||||
|
||||
#### Tree
|
||||
|
||||
The tree itself is merely a wrapper around its inner nodes.
|
||||
|
||||
Once annoying issue about writing this in Python is the lack of proper
|
||||
discriminated enum types. So we need to create a wrapper type for the nodes
|
||||
(`KdNode`) to allow for splitting when updating the tree.
|
||||
|
||||
```python
|
||||
class KdNode[T]:
|
||||
# Wrapper around leaf/inner nodes, the poor man's discriminated enum
|
||||
inner: KdLeafNode[T] | KdSplitNode[T]
|
||||
|
||||
def __init__(self):
|
||||
self.inner = KdLeafNode()
|
||||
|
||||
# Convenience constructor used when splitting a node
|
||||
@classmethod
|
||||
def from_items(cls, items: Iterable[tuple[Point, T]]) -> KdNode[T]:
|
||||
res = cls()
|
||||
res.inner.points.update(items)
|
||||
return res
|
||||
|
||||
class KdTree[T]:
|
||||
_root: KdNode[T]
|
||||
|
||||
def __init__(self):
|
||||
# Tree starts out empty
|
||||
self._root = KdNode()
|
||||
```
|
||||
|
||||
### Inserting a point
|
||||
|
||||
To add a point to the tree, we simply recurse from node to node, similar to a
|
||||
_BST_'s insertion algorithm. Once we've found the correct leaf node to insert
|
||||
our point into, we simply do so.
|
||||
|
||||
If that leaf node goes over the maximum number of points it can store, we must
|
||||
then split it along an axis, cycling between `X`, `Y`, and `Z` at each level of
|
||||
the tree (i.e: splitting along the `X` axis on the first level, then `Y` on the
|
||||
second, then `Z` after that, and then `X`, etc...).
|
||||
|
||||
```python
|
||||
# How many points should be stored in a leaf node before being split
|
||||
MAX_CAPACITY = 32
|
||||
|
||||
def median(values: Iterable[float]) -> float:
|
||||
sorted_values = sorted(values)
|
||||
mid_point = len(sorted_values) // 2
|
||||
if len(sorted_values) % 2 == 1:
|
||||
return sorted_values[mid_point]
|
||||
a, b = sorted_values[mid_point], sorted_values[mid_point + 1]
|
||||
return a + (b - a) / 2
|
||||
|
||||
def partition[T](
|
||||
pred: Callable[[T], bool],
|
||||
iterable: Iterable[T]
|
||||
) -> tuple[list[T], list[T]]:
|
||||
truths, falses = [], []
|
||||
for v in iterable:
|
||||
(truths if pred(v) else falses).append(v)
|
||||
return truths, falses
|
||||
|
||||
def split_leaf[T](node: KdLeafNode[T], axis: Axis) -> KdSplitNode[T]:
|
||||
# Find the median value for the given axis
|
||||
mid = median(p[axis] for p in node.points)
|
||||
# Split into left/right children according to the mid-point and axis
|
||||
left, right = partition(lambda kv: kv[0][axis] <= mid, node.points.items())
|
||||
return KdSplitNode(
|
||||
split_axis,
|
||||
mid,
|
||||
(KdNode.from_items(left), KdNode.from_items(right)),
|
||||
)
|
||||
|
||||
class KdTree[T]:
|
||||
def insert(self, point: Point, val: T) -> bool:
|
||||
# Forward to the root node, choose `X` as the first split axis
|
||||
return self._root.insert(point, val, Axis.X)
|
||||
|
||||
class KdLeafNode[T]:
|
||||
def insert(self, point: Point, val: T, split_axis: Axis) -> bool:
|
||||
# Check whether we're overwriting a previous value
|
||||
was_mapped = point in self.points
|
||||
# Store the corresponding value
|
||||
self.points[point] = val
|
||||
# Return whether we've performed an overwrite
|
||||
return was_mapped
|
||||
|
||||
class KdSplitNode[T]:
|
||||
def insert(self, point: Point, val: T, split_axis: Axis) -> bool:
|
||||
# Find the child which contains the point
|
||||
child = self.children[self._index(point)]
|
||||
# Recurse into it, choosing the next split axis
|
||||
return child.insert(point, val, split_axis.next())
|
||||
|
||||
class KdNode[T]:
|
||||
def insert(self, point: Point, val: T, split_axis: Axis) -> bool:
|
||||
# Add the point to the wrapped node...
|
||||
res = self.inner.insert(point, val, split_axis)
|
||||
# ... And take care of splitting leaf nodes when necessary
|
||||
if (
|
||||
isinstance(self.inner, KdLeafNode)
|
||||
and len(self.inner.points) > MAX_CAPACITY
|
||||
):
|
||||
self.inner = split_leaf(self.inner, split_axis)
|
||||
return res
|
||||
```
|
||||
|
||||
### Searching for a point
|
||||
|
||||
Looking for a given point in the tree look very similar to a _BST_'s search,
|
||||
each leaf node dividing the space into two sub-spaces, only one of which
|
||||
contains the point.
|
||||
|
||||
```python
|
||||
class KdTree[T]:
|
||||
def lookup(self, point: Point) -> T | None:
|
||||
# Forward to the root node
|
||||
return self._root.lookup(point)
|
||||
|
||||
class KdNode[T]:
|
||||
def lookup(self, point: Point) -> T | None:
|
||||
# Forward to the wrapped node
|
||||
return self.inner.lookup(point)
|
||||
|
||||
class KdLeafNode[T]:
|
||||
def lookup(self, point: Point) -> T | None:
|
||||
# Simply check whether we've stored the point in this leaf
|
||||
return self.points.get(point)
|
||||
|
||||
class KdSplitNode[T]:
|
||||
def lookup(self, point: Point) -> T | None:
|
||||
# Recurse into the child which contains the point
|
||||
return self.children[self._index(point)].lookup(point)
|
||||
```
|
||||
|
||||
### Closest points
|
||||
|
||||
Now to look at the most interesting operation one can do on a _k-d Tree_:
|
||||
querying for the objects which are closest to a given point (i.e: the [Nearest
|
||||
neighbour search][nns].
|
||||
|
||||
This is a more complicated algorithm, which will also need some modifications to
|
||||
current _k-d Tree_ implementation in order to track just a bit more information
|
||||
about the points it contains.
|
||||
|
||||
[nns]: https://en.wikipedia.org/wiki/Nearest_neighbor_search
|
||||
|
||||
#### A notion of distance
|
||||
|
||||
To search for the closest points to a given origin, we first need to define
|
||||
which [distance](https://en.wikipedia.org/wiki/Distance) we are using in our
|
||||
space.
|
||||
|
||||
For this example, we'll simply be using the usual definition of [(Euclidean)
|
||||
distance][euclidean-distance].
|
||||
|
||||
[euclidean-distance]: https://en.wikipedia.org/wiki/Euclidean_distance
|
||||
|
||||
```python
|
||||
def dist(point: Point, other: Point) -> float:
|
||||
return sqrt(sum((a - b) ** 2 for a, b in zip(self, other)))
|
||||
```
|
||||
|
||||
#### Tracking the tree's boundaries
|
||||
|
||||
To make the query efficient, we'll need to track the tree's boundaries: the
|
||||
bounding box of all points contained therein. This will allow us to stop the
|
||||
search early once we've found enough points and can be sure that the rest of the
|
||||
tree is too far away to qualify.
|
||||
|
||||
For this, let's define the `AABB` (Axis-Aligned Bounding Box) class.
|
||||
|
||||
```python
|
||||
class Point(NamedTuple):
|
||||
# Convenience function to replace the coordinate along a given dimension
|
||||
def replace(self, axis: Axis, new_coord: float) -> Point:
|
||||
coords = list(self)
|
||||
coords[axis] = new_coord
|
||||
return Point(coords)
|
||||
|
||||
class AABB(NamedTuple):
|
||||
# Lowest coordinates in the box
|
||||
low: Point
|
||||
# Highest coordinates in the box
|
||||
high: Point
|
||||
|
||||
# An empty box
|
||||
@classmethod
|
||||
def empty(cls) -> AABB:
|
||||
return cls(
|
||||
Point(*(float("inf"),) * 3),
|
||||
Point(*(float("-inf"),) * 3),
|
||||
)
|
||||
|
||||
# Split the box into two along a given axis for a given mid-point
|
||||
def split(axis: Axis, mid: float) -> tuple[AABB, AABB]:
|
||||
assert self.low[axis] <= mid <= self.high[axis]
|
||||
return (
|
||||
AABB(self.low, self.high.replace(axis, mid)),
|
||||
AABB(self.low.replace(axis, mid), self.high),
|
||||
)
|
||||
|
||||
# Extend a box to contain a given point
|
||||
def extend(self, point: Point) -> AABB:
|
||||
low = NamedTuple(*(map(min, zip(self.low, point))))
|
||||
high = NamedTuple(*(map(max, zip(self.high, point))))
|
||||
return AABB(low, high)
|
||||
|
||||
# Return the shortest between a given point and the box
|
||||
def dist_to_point(self, point: Point) -> float:
|
||||
deltas = (
|
||||
max(self.low[axis] - point[axis], 0, point[axis] - self.high[axis])
|
||||
for axis in Axis
|
||||
)
|
||||
return dist(Point(0, 0, 0), Point(*deltas))
|
||||
```
|
||||
|
||||
And do the necessary modifications to the `KdTree` to store the bounding box and
|
||||
update it as we add new points.
|
||||
|
||||
```python
|
||||
class KdTree[T]:
|
||||
_root: KdNode[T]
|
||||
# New field: to keep track of the tree's boundaries
|
||||
_aabb: AABB
|
||||
|
||||
def __init__(self):
|
||||
self._root = KdNode()
|
||||
# Initialize the empty tree with an empty bounding box
|
||||
self._aabb = AABB.empty()
|
||||
|
||||
def insert(self, point: Point, val: T) -> bool:
|
||||
# Extend the AABB for our k-d Tree when adding a point to it
|
||||
self._aabb = self._aabb.extend(point)
|
||||
return self._root.insert(point, val, Axis.X)
|
||||
```
|
||||
|
||||
#### `MaxHeap`
|
||||
|
||||
Python's builtin [`heapq`][heapq] module provides the necessary functions to
|
||||
create and interact with a [_Priority Queue_][priority-queue], in the form of a
|
||||
[_Binary Heap_][binary-heap].
|
||||
|
||||
Unfortunately, Python's library maintains a _min-heap_, which keeps the minimum
|
||||
element at the root. For this algorithm, we're interested in having a
|
||||
_max-heap_, with the maximum at the root.
|
||||
|
||||
Thankfully, one can just reverse the comparison function for each element to
|
||||
convert between the two. Let's write a `MaxHeap` class making use of this
|
||||
library, with a `Reverse` wrapper class to reverse the order of elements
|
||||
contained within it (similar to [Rust's `Reverse`][reverse]).
|
||||
|
||||
[binary-heap]: https://en.wikipedia.org/wiki/Binary_heap
|
||||
[heapq]: https://docs.python.org/3/library/heapq.html
|
||||
[priority-queue]: https://en.wikipedia.org/wiki/Priority_queue
|
||||
[reverse]: https://doc.rust-lang.org/std/cmp/struct.Reverse.html
|
||||
|
||||
```python
|
||||
# Reverses the wrapped value's ordering
|
||||
@functools.total_ordering
|
||||
class Reverse[T]:
|
||||
value: T
|
||||
|
||||
def __init__(self, value: T):
|
||||
self.value = value
|
||||
|
||||
def __lt__(self, other: Reverse[T]) -> bool:
|
||||
return self.value > other.value
|
||||
|
||||
def __eq__(self, other: Reverse[T]) -> bool:
|
||||
return self.value == other.value
|
||||
|
||||
class MaxHeap[T]:
|
||||
_heap: list[Reverse[T]]
|
||||
|
||||
def __init__(self):
|
||||
self._heap = []
|
||||
|
||||
def __len__(self) -> int:
|
||||
return len(self._heap)
|
||||
|
||||
def __iter__(self) -> Iterator[T]:
|
||||
yield from (item.value for item in self._heap)
|
||||
|
||||
# Push a value on the heap
|
||||
def push(self, value: T) -> None:
|
||||
heapq.heappush(self._heap, Reverse(value))
|
||||
|
||||
# Peek at the current maximum value
|
||||
def peek(self) -> T:
|
||||
return self._heap[0].value
|
||||
|
||||
# Pop and return the highest value
|
||||
def pop(self) -> T:
|
||||
return heapq.heappop(self._heap).value
|
||||
|
||||
# Pushes a value onto the heap, pops and returns the highest value
|
||||
def pushpop(self, value: T) -> T:
|
||||
return heapq.heappushpop(self._heap, Reverse(value)).value
|
||||
```
|
||||
|
||||
#### The actual Implementation
|
||||
|
||||
Now that we have written the necessary building blocks, let's tackle the
|
||||
Implementation of `closest` for our _k-d Tree_.
|
||||
|
||||
```python
|
||||
# Wrapper type for closest points, ordered by `distance`
|
||||
@dataclasses.dataclass(order=True)
|
||||
class ClosestPoint[T](NamedTuple):
|
||||
point: Point = field(compare=False)
|
||||
value: T = field(compare=False)
|
||||
distance: float
|
||||
|
||||
class KdTree[T]:
|
||||
def closest(self, point: Point, n: int = 1) -> list[ClosestPoint[T]]:
|
||||
assert n > 0
|
||||
# Create the output heap
|
||||
res = MaxHeap()
|
||||
# Recurse onto the root node
|
||||
self._root.closest(point, res, n, self._aabb)
|
||||
# Return the resulting list, from closest to farthest
|
||||
return sorted(res)
|
||||
|
||||
class KdNode[T]:
|
||||
def closest(
|
||||
self,
|
||||
point: Point,
|
||||
out: MaxHeap[ClosestPoint[T]],
|
||||
n: int,
|
||||
bounds: AABB,
|
||||
) -> None:
|
||||
# Forward to the wrapped node
|
||||
self.inner.closest(point, out, n, bounds)
|
||||
|
||||
class KdLeafNode[T]:
|
||||
def closest(
|
||||
self,
|
||||
point: Point,
|
||||
out: MaxHeap[ClosestPoint[T]],
|
||||
n: int,
|
||||
bounds: AABB,
|
||||
) -> None:
|
||||
# At the leaf, simply iterate over all points and add them to the heap
|
||||
for p, val in self.points.items():
|
||||
item = ClosestPoint(p, val, dist(p, point))
|
||||
if len(out) < n:
|
||||
# If the heap isn't full, just push
|
||||
out.push(item)
|
||||
elif out.peek().distance > item.distance:
|
||||
# Otherwise, push and pop to keep the heap at `n` elements
|
||||
out.pushpop(item)
|
||||
|
||||
class KdSplitNode[T]:
|
||||
def closest(
|
||||
self,
|
||||
point: Point,
|
||||
out: MaxHeap[ClosestPoint[T]],
|
||||
n: int,
|
||||
bounds: AABB,
|
||||
) -> None:
|
||||
index = self._index(point)
|
||||
children_bounds = bounds.split(self.axis, self.mid)
|
||||
# Iterate over the child which contains the point, then its neighbour
|
||||
for i in (index, 1 - index):
|
||||
child, bounds = self.children[i], children_bounds[i]
|
||||
# `min_dist` is 0 for the first child, and the minimum distance of
|
||||
# all points contained in the second child
|
||||
min_dist = bounds.dist_to_point(point)
|
||||
# If the heap is at capacity and the child to inspect too far, stop
|
||||
if len(out) == n and min_dist > out.peek().distance:
|
||||
return
|
||||
# Otherwise, recurse
|
||||
child.closest(point, out, n, bounds)
|
||||
```
|
||||
|
|
@ -1,112 +0,0 @@
|
|||
---
|
||||
title: "k-d Tree Revisited"
|
||||
date: 2024-08-17T14:20:22+01:00
|
||||
draft: false # I don't care for draft mode, git has branches for that
|
||||
description: "Simplifying the nearest neighbour search"
|
||||
tags:
|
||||
- algorithms
|
||||
- data structures
|
||||
- python
|
||||
categories:
|
||||
- programming
|
||||
series:
|
||||
- Cool algorithms
|
||||
favorite: false
|
||||
disable_feed: false
|
||||
---
|
||||
|
||||
After giving it a bit of thought, I've found a way to simplify the nearest
|
||||
neighbour search (i.e: the `closest` method) for the `KdTree` I implemented in
|
||||
[my previous post]({{< relref "../2024-08-10-kd-tree/index.md" >}}).
|
||||
|
||||
<!--more-->
|
||||
|
||||
## The improvement
|
||||
|
||||
That post implemented the nearest neighbour search by keeping track of the
|
||||
tree's boundaries (through `AABB`), and each of its sub-trees (through
|
||||
`AABB.split`), and testing for the early exit condition by computing the
|
||||
distance of the search's origin to each sub-tree's boundaries.
|
||||
|
||||
Instead of _explicitly_ keeping track of each sub-tree's boundaries, we can
|
||||
implicitly compute it when recursing down the tree.
|
||||
|
||||
To check for the distance between the queried point and the splitting plane of
|
||||
inner nodes: we simply need to project the origin onto that plane, thus giving
|
||||
us a minimal bound on the distance of the points stored on the other side.
|
||||
|
||||
This can be easily computed from the `axis` and `mid` values which are stored in
|
||||
the inner nodes: to project the node on the plane we simply replace its
|
||||
coordinate for this axis by `mid`.
|
||||
|
||||
## Simplified search
|
||||
|
||||
With that out of the way, let's now see how `closest` can be implemented without
|
||||
needing to track the tree's `AABB` at the root:
|
||||
|
||||
```python
|
||||
# Wrapper type for closest points, ordered by `distance`
|
||||
@dataclasses.dataclass(order=True)
|
||||
class ClosestPoint[T](NamedTuple):
|
||||
point: Point = field(compare=False)
|
||||
value: T = field(compare=False)
|
||||
distance: float
|
||||
|
||||
class KdTree[T]:
|
||||
def closest(self, point: Point, n: int = 1) -> list[ClosestPoint[T]]:
|
||||
assert n > 0
|
||||
res = MaxHeap()
|
||||
# Instead of passing an `AABB`, we give an initial projection point,
|
||||
# the query origin itself (since we haven't visited any split node yet)
|
||||
self._root.closest(point, res, n, point)
|
||||
return sorted(res)
|
||||
|
||||
class KdNode[T]:
|
||||
def closest(
|
||||
self,
|
||||
point: Point,
|
||||
out: MaxHeap[ClosestPoint[T]],
|
||||
n: int,
|
||||
projection: Point,
|
||||
) -> None:
|
||||
# Same implementation
|
||||
self.inner.closest(point, out, n, bounds)
|
||||
|
||||
class KdLeafNode[T]:
|
||||
def closest(
|
||||
self,
|
||||
point: Point,
|
||||
out: MaxHeap[ClosestPoint[T]],
|
||||
n: int,
|
||||
projection: Point,
|
||||
) -> None:
|
||||
# Same implementation
|
||||
for p, val in self.points.items():
|
||||
item = ClosestPoint(p, val, dist(p, point))
|
||||
if len(out) < n:
|
||||
out.push(item)
|
||||
elif out.peek().distance > item.distance:
|
||||
out.pushpop(item)
|
||||
|
||||
class KdSplitNode[T]:
|
||||
def closest(
|
||||
self,
|
||||
point: Point,
|
||||
out: MaxHeap[ClosestPoint[T]],
|
||||
n: int,
|
||||
projection: Point,
|
||||
) -> None:
|
||||
index = self._index(point)
|
||||
self.children[index].closest(point, out, n, projection)
|
||||
# Project onto the splitting plane, for a minimum distance to its points
|
||||
projection = projection.replace(self.axis, self.mid)
|
||||
# If we're at capacity and can't possibly find any closer points, exit
|
||||
if len(out) == n and dist(point, projection) > out.peek().distance:
|
||||
return
|
||||
# Otherwise recurse on the other side to check for nearer neighbours
|
||||
self.children[1 - index].closest(point, out, n, projection)
|
||||
```
|
||||
|
||||
As you can see, the main difference is in `KdSplitNode`'s implementation, where
|
||||
we can quickly compute the minimum distance between the search's origin and all
|
||||
potential points in that subspace.
|
||||
|
|
@ -14,13 +14,3 @@ A few of my Amazon wish lists in case you want to give me a gift.
|
|||
* [Cooking & kitchenware](https://www.amazon.fr/hz/wishlist/ls/2MNRCLPNABZSU)
|
||||
* [Data Structures & Algorithms](https://www.amazon.fr/hz/wishlist/ls/2XZPQSBOGOFC3)
|
||||
* [Programming](https://www.amazon.fr/hz/wishlist/ls/1R4KFV4H2D8IF)
|
||||
* Other items:
|
||||
* [Chef's presses](https://www.thechefspress.com/shop)
|
||||
* [UK link](https://www.kitchenprovisions.co.uk/products/the-chefs-press-8oz-13oz)
|
||||
* [FR link](https://www.thekitchenlab.fr/fr/p/la-presse-du-chef-poids-de-friture-bruce-hill-13-oz/)
|
||||
* [Combustion Inc thermometer and
|
||||
display](https://combustion.inc/products/predictive-thermometer-gen2-wifi-display)
|
||||
* [Get the one with the range extender if you *really* want to spoil
|
||||
me](https://combustion.inc/products/predictive-thermometer-gen2-wifi-booster-wifi-display)
|
||||
* [Cannelés
|
||||
molds](https://www.laboetgato.fr/en/moules-a-canneles/13964-mould-for-canneles-non-polished-copper-o-45-mm-3333331010026.html)
|
||||
|
|
|
|||
83
flake.lock
generated
83
flake.lock
generated
|
|
@ -1,68 +1,28 @@
|
|||
{
|
||||
"nodes": {
|
||||
"flake-compat": {
|
||||
"flake": false,
|
||||
"locked": {
|
||||
"lastModified": 1696426674,
|
||||
"narHash": "sha256-kvjfFW7WAETZlt09AgDn1MrtKzP7t90Vf7vypd3OL1U=",
|
||||
"owner": "edolstra",
|
||||
"repo": "flake-compat",
|
||||
"rev": "0f9255e01c2351cc7d116c072cb317785dd33b33",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
"owner": "edolstra",
|
||||
"repo": "flake-compat",
|
||||
"type": "github"
|
||||
}
|
||||
},
|
||||
"futils": {
|
||||
"inputs": {
|
||||
"systems": "systems"
|
||||
},
|
||||
"locked": {
|
||||
"lastModified": 1710146030,
|
||||
"narHash": "sha256-SZ5L6eA7HJ/nmkzGG7/ISclqe6oZdOZTNoesiInkXPQ=",
|
||||
"lastModified": 1622445595,
|
||||
"narHash": "sha256-m+JRe6Wc5OZ/mKw2bB3+Tl0ZbtyxxxfnAWln8Q5qs+Y=",
|
||||
"owner": "numtide",
|
||||
"repo": "flake-utils",
|
||||
"rev": "b1d9ab70662946ef0850d488da1c9019f3a9752a",
|
||||
"rev": "7d706970d94bc5559077eb1a6600afddcd25a7c8",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
"owner": "numtide",
|
||||
"ref": "main",
|
||||
"ref": "master",
|
||||
"repo": "flake-utils",
|
||||
"type": "github"
|
||||
}
|
||||
},
|
||||
"gitignore": {
|
||||
"inputs": {
|
||||
"nixpkgs": [
|
||||
"pre-commit-hooks",
|
||||
"nixpkgs"
|
||||
]
|
||||
},
|
||||
"locked": {
|
||||
"lastModified": 1709087332,
|
||||
"narHash": "sha256-HG2cCnktfHsKV0s4XW83gU3F57gaTljL9KNSuG6bnQs=",
|
||||
"owner": "hercules-ci",
|
||||
"repo": "gitignore.nix",
|
||||
"rev": "637db329424fd7e46cf4185293b9cc8c88c95394",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
"owner": "hercules-ci",
|
||||
"repo": "gitignore.nix",
|
||||
"type": "github"
|
||||
}
|
||||
},
|
||||
"nixpkgs": {
|
||||
"locked": {
|
||||
"lastModified": 1722415718,
|
||||
"narHash": "sha256-5US0/pgxbMksF92k1+eOa8arJTJiPvsdZj9Dl+vJkM4=",
|
||||
"lastModified": 1628320020,
|
||||
"narHash": "sha256-4xBEb+TOHyIGpK37EVsZx6dGPwNMf5YWNBJaQ4VyZws=",
|
||||
"owner": "NixOS",
|
||||
"repo": "nixpkgs",
|
||||
"rev": "c3392ad349a5227f4a3464dce87bcc5046692fce",
|
||||
"rev": "67c80531be622641b5b2ccc3a7aff355cb02476b",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
|
|
@ -74,21 +34,19 @@
|
|||
},
|
||||
"pre-commit-hooks": {
|
||||
"inputs": {
|
||||
"flake-compat": "flake-compat",
|
||||
"gitignore": "gitignore",
|
||||
"nixpkgs": [
|
||||
"nixpkgs"
|
||||
"flake-utils": [
|
||||
"futils"
|
||||
],
|
||||
"nixpkgs-stable": [
|
||||
"nixpkgs": [
|
||||
"nixpkgs"
|
||||
]
|
||||
},
|
||||
"locked": {
|
||||
"lastModified": 1721042469,
|
||||
"narHash": "sha256-6FPUl7HVtvRHCCBQne7Ylp4p+dpP3P/OYuzjztZ4s70=",
|
||||
"lastModified": 1621411868,
|
||||
"narHash": "sha256-R+7OQ2JYFCb3E7Jl7LhRifzMVCR6Gl8R98zYsNhZtJ8=",
|
||||
"owner": "cachix",
|
||||
"repo": "pre-commit-hooks.nix",
|
||||
"rev": "f451c19376071a90d8c58ab1a953c6e9840527fd",
|
||||
"rev": "2e7fac06108b4fc81f5ff9ed9a02bc4f6ede7001",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
|
|
@ -104,21 +62,6 @@
|
|||
"nixpkgs": "nixpkgs",
|
||||
"pre-commit-hooks": "pre-commit-hooks"
|
||||
}
|
||||
},
|
||||
"systems": {
|
||||
"locked": {
|
||||
"lastModified": 1681028828,
|
||||
"narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
|
||||
"owner": "nix-systems",
|
||||
"repo": "default",
|
||||
"rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
"owner": "nix-systems",
|
||||
"repo": "default",
|
||||
"type": "github"
|
||||
}
|
||||
}
|
||||
},
|
||||
"root": "root",
|
||||
|
|
|
|||
20
flake.nix
20
flake.nix
|
|
@ -6,7 +6,7 @@
|
|||
type = "github";
|
||||
owner = "numtide";
|
||||
repo = "flake-utils";
|
||||
ref = "main";
|
||||
ref = "master";
|
||||
};
|
||||
|
||||
nixpkgs = {
|
||||
|
|
@ -22,8 +22,8 @@
|
|||
repo = "pre-commit-hooks.nix";
|
||||
ref = "master";
|
||||
inputs = {
|
||||
flake-utils.follows = "futils";
|
||||
nixpkgs.follows = "nixpkgs";
|
||||
nixpkgs-stable.follows = "nixpkgs";
|
||||
};
|
||||
};
|
||||
};
|
||||
|
|
@ -61,17 +61,15 @@
|
|||
};
|
||||
};
|
||||
|
||||
devShells = {
|
||||
default = pkgs.mkShell {
|
||||
name = "blog";
|
||||
devShell = pkgs.mkShell {
|
||||
name = "blog";
|
||||
|
||||
buildInputs = with pkgs; [
|
||||
gnumake
|
||||
hugo
|
||||
];
|
||||
buildInputs = with pkgs; [
|
||||
gnumake
|
||||
hugo
|
||||
];
|
||||
|
||||
inherit (self.checks.${system}.pre-commit) shellHook;
|
||||
};
|
||||
inherit (self.checks.${system}.pre-commit) shellHook;
|
||||
};
|
||||
}
|
||||
);
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
series:
|
||||
other: "series"
|
||||
serie:
|
||||
other: "serie"
|
||||
|
||||
Series:
|
||||
other: "Series"
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
series:
|
||||
serie:
|
||||
other: "série"
|
||||
|
||||
Series:
|
||||
|
|
|
|||
|
|
@ -11,7 +11,6 @@
|
|||
<a data-hint="Sourcehut" title="Sourcehut" href="https://sr.ht/~{{ .Site.Author.sourcehut }}" target="_blank" rel="me"> Sourcehut </a>
|
||||
<a data-hint="LinkedIn" title="LinkedIn" href="https://www.linkedin.com/in/{{ .Site.Author.linkedin }}" target="_blank" rel="me"> LinkedIn </a>
|
||||
<a data-hint="Matrix" title="Matrix" href="https://matrix.to/#/{{ .Site.Author.matrix }}" target="_blank" rel="me"> Matrix </a>
|
||||
<a data-hint="Mastodon" title="Mastodon" href="https://{{ .Site.Author.mastodon }}" target="_blank" rel="me"> Mastodon </a>
|
||||
<a rel="pgpkey" href="https://key.belanyi.fr/key.pgp"> PGP </a>
|
||||
<link rel="authorization_endpoint" href="https://indieauth.com/auth">
|
||||
<p>
|
||||
|
|
|
|||
|
|
@ -3,30 +3,6 @@
|
|||
<link rel="stylesheet" type="text/css" href="https://tikzjax.com/v1/fonts.css">
|
||||
<script async src="https://tikzjax.com/v1/tikzjax.js"></script>
|
||||
{{ end }}
|
||||
<!-- Graphviz support -->
|
||||
{{ if (.Params.graphviz) }}
|
||||
<script src="https://cdn.jsdelivr.net/npm/@viz-js/viz@3.7.0/lib/viz-standalone.min.js"></script>
|
||||
<script type="text/javascript">
|
||||
(function() {
|
||||
Viz.instance().then(function(viz) {
|
||||
Array.prototype.forEach.call(document.querySelectorAll("pre.graphviz"), function(x) {
|
||||
var svg = viz.renderSVGElement(x.innerText);
|
||||
// Let CSS take care of the SVG size
|
||||
svg.removeAttribute("width")
|
||||
svg.setAttribute("height", "auto")
|
||||
x.replaceChildren(svg)
|
||||
})
|
||||
})
|
||||
})();
|
||||
</script>
|
||||
{{ end }}
|
||||
<!-- Mermaid support -->
|
||||
{{ if (.Params.mermaid) }}
|
||||
<script type="module" async>
|
||||
import mermaid from "https://cdn.jsdelivr.net/npm/mermaid@latest/dist/mermaid.esm.min.mjs";
|
||||
mermaid.initialize({ startOnLoad: true });
|
||||
</script>
|
||||
{{ end }}
|
||||
{{ with .OutputFormats.Get "atom" -}}
|
||||
{{ printf `<link rel="%s" type="%s" href="%s" title="%s" />` .Rel .MediaType.Type .Permalink $.Site.Title | safeHTML }}
|
||||
{{ end -}}
|
||||
|
|
|
|||
|
|
@ -1,16 +0,0 @@
|
|||
<pre class="graphviz">
|
||||
{{ with .Get "file" }}
|
||||
{{ if eq (. | printf "%.1s") "/" }}
|
||||
{{/* Absolute path are from root of site. */}}
|
||||
{{ $.Scratch.Set "filepath" . }}
|
||||
{{ else }}
|
||||
{{/* Relative paths are from page directory. */}}
|
||||
{{ $.Scratch.Set "filepath" $.Page.File.Dir }}
|
||||
{{ $.Scratch.Add "filepath" . }}
|
||||
{{ end }}
|
||||
|
||||
{{ $.Scratch.Get "filepath" | readFile }}
|
||||
{{ else }}
|
||||
{{.Inner}}
|
||||
{{ end }}
|
||||
</pre>
|
||||
|
|
@ -1,16 +0,0 @@
|
|||
<pre class="mermaid">
|
||||
{{ with .Get "file" }}
|
||||
{{ if eq (. | printf "%.1s") "/" }}
|
||||
{{/* Absolute path are from root of site. */}}
|
||||
{{ $.Scratch.Set "filepath" . }}
|
||||
{{ else }}
|
||||
{{/* Relative paths are from page directory. */}}
|
||||
{{ $.Scratch.Set "filepath" $.Page.File.Dir }}
|
||||
{{ $.Scratch.Add "filepath" . }}
|
||||
{{ end }}
|
||||
|
||||
{{ $.Scratch.Get "filepath" | readFile }}
|
||||
{{ else }}
|
||||
{{.Inner}}
|
||||
{{ end }}
|
||||
</pre>
|
||||
|
|
@ -1,16 +1,3 @@
|
|||
<script type="text/tikz">
|
||||
{{ with .Get "file" }}
|
||||
{{ if eq (. | printf "%.1s") "/" }}
|
||||
{{/* Absolute path are from root of site. */}}
|
||||
{{ $.Scratch.Set "filepath" . }}
|
||||
{{ else }}
|
||||
{{/* Relative paths are from page directory. */}}
|
||||
{{ $.Scratch.Set "filepath" $.Page.File.Dir }}
|
||||
{{ $.Scratch.Add "filepath" . }}
|
||||
{{ end }}
|
||||
|
||||
{{ $.Scratch.Get "filepath" | readFile }}
|
||||
{{ else }}
|
||||
{{.Inner}}
|
||||
{{ end }}
|
||||
{{.Inner}}
|
||||
</script>
|
||||
|
|
|
|||
|
|
@ -1 +1 @@
|
|||
Subproject commit d77e0d6d552ad135138f1f9443ef30cc5af8f0f3
|
||||
Subproject commit 5dab60e04a37896c09a32137aefe821c63b3af04
|
||||
|
|
@ -1 +1 @@
|
|||
Subproject commit d545effed9949bf834eaed09ad423ec3e030794f
|
||||
Subproject commit 5da913dc46d2dadcaf6548256238c58c504476de
|
||||
Loading…
Add table
Add a link
Reference in a new issue