Compare commits
1 commit
main
...
ee916a506b
Author | SHA1 | Date | |
---|---|---|---|
Bruno BELANYI | ee916a506b |
64
.drone.jsonnet
Normal file
64
.drone.jsonnet
Normal file
|
@ -0,0 +1,64 @@
|
||||||
|
local Pipeline(isDev) = {
|
||||||
|
kind: "pipeline",
|
||||||
|
name: if isDev then "deploy-dev" else "deploy-prod",
|
||||||
|
# Dev ignores "master", prod only triggers on "master"
|
||||||
|
trigger: { branch: { [if isDev then "exclude" else "include"]: [ "main" ] } },
|
||||||
|
# We want to clone the submodules, which isn't done by default
|
||||||
|
clone: { disable: true },
|
||||||
|
steps: [
|
||||||
|
{
|
||||||
|
name: "clone",
|
||||||
|
image: "plugins/git",
|
||||||
|
recursive: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "markdownlint",
|
||||||
|
image: "06kellyjac/markdownlint-cli",
|
||||||
|
commands: [
|
||||||
|
"markdownlint --version",
|
||||||
|
"markdownlint content/",
|
||||||
|
],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "build",
|
||||||
|
image: "klakegg/hugo",
|
||||||
|
commands: [
|
||||||
|
"hugo version",
|
||||||
|
# If dev, include drafts and future articles, change base URL
|
||||||
|
"hugo --minify" + if isDev then " -D -F -b https://dev.belanyi.fr" else "",
|
||||||
|
],
|
||||||
|
[if !isDev then "environment"]: { HUGO_ENV: "production" }
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "deploy",
|
||||||
|
image: "appleboy/drone-scp",
|
||||||
|
settings: {
|
||||||
|
source: "public/*",
|
||||||
|
strip_components: 1, # Remove 'public/' suffix from file paths
|
||||||
|
rm: true, # Remove previous files from target directory
|
||||||
|
host: { from_secret: "ssh_host" },
|
||||||
|
target: { from_secret: "ssh_target" + if isDev then "_dev" else "" },
|
||||||
|
username: { from_secret: "ssh_user" },
|
||||||
|
key: { from_secret: "ssh_key" },
|
||||||
|
port: { from_secret: "ssh_port" },
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "notify",
|
||||||
|
image: "plugins/matrix",
|
||||||
|
settings: {
|
||||||
|
homeserver: { from_secret: "matrix_homeserver" },
|
||||||
|
roomid: { from_secret: "matrix_roomid" },
|
||||||
|
username: { from_secret: "matrix_username" },
|
||||||
|
password: { from_secret: "matrix_password" },
|
||||||
|
},
|
||||||
|
trigger: { status: [ "failure", "success", ] },
|
||||||
|
},
|
||||||
|
]
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
[
|
||||||
|
Pipeline(false),
|
||||||
|
Pipeline(true),
|
||||||
|
]
|
7
.envrc
7
.envrc
|
@ -1 +1,8 @@
|
||||||
|
use_flake() {
|
||||||
|
watch_file flake.nix
|
||||||
|
watch_file flake.lock
|
||||||
|
eval "$(nix print-dev-env)"
|
||||||
|
}
|
||||||
|
|
||||||
use flake
|
use flake
|
||||||
|
eval "$shellHooks"
|
||||||
|
|
|
@ -1,3 +0,0 @@
|
||||||
# MD024/no-duplicate-heading/no-duplicate-header
|
|
||||||
MD024:
|
|
||||||
siblings_only: true
|
|
|
@ -1,64 +0,0 @@
|
||||||
labels:
|
|
||||||
backend: local
|
|
||||||
|
|
||||||
matrix:
|
|
||||||
include:
|
|
||||||
- TYPE: dev
|
|
||||||
MAKE_TARGET: build-dev
|
|
||||||
SSH_TARGET: ssh_target_dev
|
|
||||||
- TYPE: prod
|
|
||||||
MAKE_TARGET: build-prod
|
|
||||||
SSH_TARGET: ssh_target
|
|
||||||
|
|
||||||
# Run the correct matrix build on the correct branch
|
|
||||||
when:
|
|
||||||
evaluate: |
|
|
||||||
((CI_COMMIT_BRANCH == CI_REPO_DEFAULT_BRANCH) == ("${TYPE}" == "prod"))
|
|
||||||
|
|
||||||
steps:
|
|
||||||
- name: check
|
|
||||||
image: bash
|
|
||||||
commands:
|
|
||||||
- nix flake check
|
|
||||||
|
|
||||||
- name: build (${TYPE})
|
|
||||||
image: bash
|
|
||||||
commands:
|
|
||||||
# If dev, include drafts and future articles, change base URL
|
|
||||||
- nix develop -c make ${MAKE_TARGET}
|
|
||||||
|
|
||||||
- name: deploy (${TYPE})
|
|
||||||
image: bash
|
|
||||||
environment:
|
|
||||||
# Trailing slash to synchronize the folder's *content* to the target
|
|
||||||
SYNC_SOURCE: public/
|
|
||||||
SYNC_KEY:
|
|
||||||
from_secret: ssh_key
|
|
||||||
SYNC_PORT:
|
|
||||||
from_secret: ssh_port
|
|
||||||
SYNC_TARGET:
|
|
||||||
from_secret: ${SSH_TARGET}
|
|
||||||
SYNC_USERNAME:
|
|
||||||
from_secret: ssh_user
|
|
||||||
SYNC_HOST:
|
|
||||||
from_secret: ssh_host
|
|
||||||
commands:
|
|
||||||
- "nix run github:ambroisie/nix-config#drone-rsync"
|
|
||||||
|
|
||||||
- name: notify
|
|
||||||
image: bash
|
|
||||||
environment:
|
|
||||||
ADDRESS:
|
|
||||||
from_secret: matrix_homeserver
|
|
||||||
ROOM:
|
|
||||||
from_secret: matrix_roomid
|
|
||||||
USER:
|
|
||||||
from_secret: matrix_username
|
|
||||||
PASS:
|
|
||||||
from_secret: matrix_password
|
|
||||||
commands:
|
|
||||||
- nix run github:ambroisie/matrix-notifier
|
|
||||||
when:
|
|
||||||
status:
|
|
||||||
- failure
|
|
||||||
- success
|
|
7
Makefile
7
Makefile
|
@ -3,7 +3,7 @@ all: build-dev
|
||||||
|
|
||||||
.PHONY: build-dev
|
.PHONY: build-dev
|
||||||
build-dev:
|
build-dev:
|
||||||
HUGO_TITLE="Ambroisie's dev blog" HUGO_BASEURL=https://dev.belanyi.fr hugo -D -F
|
HUGO_BASEURL=https://dev.belanyi.fr hugo -D -F
|
||||||
|
|
||||||
.PHONY: build-prod
|
.PHONY: build-prod
|
||||||
build-prod:
|
build-prod:
|
||||||
|
@ -13,6 +13,11 @@ build-prod:
|
||||||
serve:
|
serve:
|
||||||
hugo server -D -F
|
hugo server -D -F
|
||||||
|
|
||||||
|
deploy:
|
||||||
|
@if [ -n "$$KEY" ]; then eval "$$(ssh-agent)"; echo "$$KEY" | ssh-add -; fi
|
||||||
|
if [ -z "$$USERNAME" ] || [ -z "$$SSH_HOST" ] || [ -z "$$TARGET" ]; then exit 1; fi
|
||||||
|
rsync --progress -avz --delete public/ "$$USERNAME@$$SSH_HOST:$$TARGET"
|
||||||
|
|
||||||
.PHONY: clean
|
.PHONY: clean
|
||||||
clean:
|
clean:
|
||||||
$(RM) -r public
|
$(RM) -r public
|
||||||
|
|
|
@ -5,18 +5,15 @@ draft: false # I don't care for draft mode, git has branches for that
|
||||||
description: ""
|
description: ""
|
||||||
tags:
|
tags:
|
||||||
- accounting
|
- accounting
|
||||||
- algorithms
|
|
||||||
- c++
|
- c++
|
||||||
- ci/cd
|
- ci/cd
|
||||||
- cli
|
- cli
|
||||||
- data structures
|
|
||||||
- design-pattern
|
- design-pattern
|
||||||
- docker
|
- docker
|
||||||
- drone
|
- drone
|
||||||
- git
|
- git
|
||||||
- hugo
|
- hugo
|
||||||
- nix
|
- nix
|
||||||
- python
|
|
||||||
- self-hosting
|
- self-hosting
|
||||||
- test
|
- test
|
||||||
categories:
|
categories:
|
||||||
|
|
16
config.yaml
16
config.yaml
|
@ -6,6 +6,8 @@ theme:
|
||||||
- "hugo-atom-feed"
|
- "hugo-atom-feed"
|
||||||
- "anubis"
|
- "anubis"
|
||||||
paginate: 5
|
paginate: 5
|
||||||
|
disqusShortname: ""
|
||||||
|
googleAnalytics: ""
|
||||||
enableRobotsTXT: true
|
enableRobotsTXT: true
|
||||||
enableEmoji: true
|
enableEmoji: true
|
||||||
|
|
||||||
|
@ -35,12 +37,11 @@ menu:
|
||||||
author:
|
author:
|
||||||
name: "Bruno BELANYI"
|
name: "Bruno BELANYI"
|
||||||
email: "contact-blog@belanyi.fr"
|
email: "contact-blog@belanyi.fr"
|
||||||
github: "ambroisie"
|
github: "Ambroisie"
|
||||||
gitlab: "ambroisie"
|
gitlab: "Ambroisie"
|
||||||
sourcehut: "ambroisie"
|
sourcehut: "ambroisie"
|
||||||
linkedin: "bruno-belanyi"
|
linkedin: "bruno-belanyi"
|
||||||
matrix: "@ambroisie:belanyi.fr"
|
matrix: "@ambroisie:belanyi.fr"
|
||||||
mastodon: "nixos.paris/@ambroisie"
|
|
||||||
|
|
||||||
permalinks:
|
permalinks:
|
||||||
posts: /:year/:month/:day/:title/
|
posts: /:year/:month/:day/:title/
|
||||||
|
@ -65,18 +66,11 @@ params:
|
||||||
webmentions:
|
webmentions:
|
||||||
login: belanyi.fr
|
login: belanyi.fr
|
||||||
pingback: true
|
pingback: true
|
||||||
mathjax: true
|
|
||||||
|
|
||||||
services:
|
|
||||||
disqus:
|
|
||||||
shortname: ""
|
|
||||||
googleAnalytics:
|
|
||||||
ID: ""
|
|
||||||
|
|
||||||
taxonomies:
|
taxonomies:
|
||||||
category: "categories"
|
category: "categories"
|
||||||
tag: "tags"
|
tag: "tags"
|
||||||
series: "series"
|
serie: "series"
|
||||||
|
|
||||||
markup:
|
markup:
|
||||||
goldmark:
|
goldmark:
|
||||||
|
|
|
@ -4,13 +4,7 @@ description: "About me"
|
||||||
date: 2020-07-14
|
date: 2020-07-14
|
||||||
---
|
---
|
||||||
|
|
||||||
I'm currently working as a Senior Software Engineer at [Google][google], as part
|
I'm a CS student at EPITA.
|
||||||
of their Embedded Graphics Drivers team for Pixel devices.
|
|
||||||
|
|
||||||
[google]: https://www.linkedin.com/company/google/mycompany/verification/
|
|
||||||
|
|
||||||
You can find my CV [here](https://cv.belanyi.fr/en.pdf). Or
|
You can find my CV [here](https://cv.belanyi.fr/en.pdf). Or
|
||||||
[here](https://cv.belanyi.fr/fr.pdf) for the french version.
|
[here](https://cv.belanyi.fr/fr.pdf) for the french version.
|
||||||
|
|
||||||
If you are a friend or family, you might be looking for my [wish
|
|
||||||
lists]({{< ref "wish-lists.md" >}}) to find some present ideas.
|
|
||||||
|
|
|
@ -1,172 +0,0 @@
|
||||||
---
|
|
||||||
title: "Magic Conversions in C++"
|
|
||||||
date: 2021-10-01T14:46:14+02:00
|
|
||||||
draft: false # I don't care for draft mode, git has branches for that
|
|
||||||
description: "How to get the compiler to infer the correct conversion"
|
|
||||||
tags:
|
|
||||||
- c++
|
|
||||||
- design-pattern
|
|
||||||
categories:
|
|
||||||
- programming
|
|
||||||
series:
|
|
||||||
favorite: false
|
|
||||||
disable_feed: false
|
|
||||||
---
|
|
||||||
|
|
||||||
One feature that I like a lot in [Rust][rust-lang] is return type polymorphism,
|
|
||||||
best exemplified with the following snippet of code:
|
|
||||||
|
|
||||||
```rust
|
|
||||||
use std::collections::HashSet;
|
|
||||||
|
|
||||||
fn main() {
|
|
||||||
let vec: Vec<_> = (0..10).filter(|a| a % 2 == 0).collect();
|
|
||||||
let set: HashSet<_> = (0..10).filter(|a| a % 2 == 0).collect();
|
|
||||||
println!("vec: {:?}", vec);
|
|
||||||
println!("set: {:?}", set);
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
We have the same expression (`(0..10).filter(|a| a % 2 == 0).collect()`) that
|
|
||||||
results in two totally different types of values (a `Vec` and a `HashSet`)!
|
|
||||||
|
|
||||||
This is because Rust allows you to write a function which is generic in its
|
|
||||||
*return type*, which is a super-power that C++ does not have. But is there a way
|
|
||||||
to emulate this behaviour with some clever code?
|
|
||||||
|
|
||||||
[rust-lang]: https://rust-lang.org/
|
|
||||||
<!--more-->
|
|
||||||
|
|
||||||
## The problem
|
|
||||||
|
|
||||||
For the purposes of this article, the problem that I am trying to solve will be
|
|
||||||
the following:
|
|
||||||
|
|
||||||
```c++
|
|
||||||
void takes_small_array(std::array<char, 32> arr);
|
|
||||||
void takes_big_array(std::array<char, 4096> arr);
|
|
||||||
|
|
||||||
// How to define a `to_array` function so that the following works?
|
|
||||||
void test(std::string_view s) {
|
|
||||||
takes_small_array(to_array(s));
|
|
||||||
takes_big_array(to_array(s));
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
## First attempt
|
|
||||||
|
|
||||||
If we try to solve this in a way similar to Rust, we hit a problem in what the
|
|
||||||
language allows us to write:
|
|
||||||
|
|
||||||
```c++
|
|
||||||
std::array<char, 32> to_array(std::string_view s) {
|
|
||||||
std::array<char, 32> ret;
|
|
||||||
std::copy(s.begin(), s.end(), ret.begin());
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::array<char, 4096> to_array(std::string_view s) {
|
|
||||||
std::array<char, 4096> ret;
|
|
||||||
std::copy(s.begin(), s.end(), ret.begin());
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
The compiler complains with the following error:
|
|
||||||
|
|
||||||
```none
|
|
||||||
ambiguating new declaration of 'std::array<char, 4096> to_array(std::string_view)'
|
|
||||||
note: old declaration 'std::array<char, 32> to_array(std::string_view)'
|
|
||||||
```
|
|
||||||
|
|
||||||
That is because C++ does **not** allow you to write an overload set based on
|
|
||||||
*return type only*.
|
|
||||||
|
|
||||||
## Using templates
|
|
||||||
|
|
||||||
For our second try, we want to use *non-type template parameters* to solve the
|
|
||||||
issue. We write the following:
|
|
||||||
|
|
||||||
```c++
|
|
||||||
template <size_t N>
|
|
||||||
std::array<char, N> to_array(std::string_view s) {
|
|
||||||
std::array<char, N> ret;
|
|
||||||
std::copy(s.begin(), s.end(), ret.begin());
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
The compiler does not complain when we write this! We have also solved two minor
|
|
||||||
issues with the previous try: the size of the arrays are not hard-coded, and we
|
|
||||||
kept the code DRY.
|
|
||||||
|
|
||||||
However we have some trouble trying to use those functions as stated in the
|
|
||||||
beginning of the problem, with the following error message:
|
|
||||||
|
|
||||||
```none
|
|
||||||
error: no matching function for call to 'to_array(std::string_view&)'
|
|
||||||
| takes_small_array(to_array(s));
|
|
||||||
note: candidate: 'template<size_t N> std::array<char, N> to_array(std::string_view)'
|
|
||||||
| std::array<char, N> to_array(std::string_view s) {
|
|
||||||
note: template argument deduction/substitution failed:
|
|
||||||
note: couldn't deduce template parameter 'N'
|
|
||||||
```
|
|
||||||
|
|
||||||
The compiler cannot deduce the size of the array we want to use! We could solve
|
|
||||||
the issue by explicitly giving a size when calling the function
|
|
||||||
(`to_array<32>(s)`) however this is unsatisfactory: we are not solving the
|
|
||||||
problem as stated initially, which could for example lead to needless churning
|
|
||||||
if we change the signature of `takes_small_array` to instead use
|
|
||||||
`std::array<char, 64>`).
|
|
||||||
|
|
||||||
Thankfully there is a way to use the compiler to our advantage, and have it
|
|
||||||
deduce it for us, but it involves some trickery.
|
|
||||||
|
|
||||||
## The solution
|
|
||||||
|
|
||||||
We want to write a function that resolves the previous two issues we
|
|
||||||
experienced:
|
|
||||||
|
|
||||||
* The non-type template parameter must be deduced by the end of the call to
|
|
||||||
`to_array`, but we can only deduce it once it is being consumed by
|
|
||||||
`takes_{small,big}_array` -- which is too late for the compiler.
|
|
||||||
* We cannot overload on the return type, which means we must return a single
|
|
||||||
type from the function.
|
|
||||||
|
|
||||||
The goal is to delay *when* the deduction of the array's size is happening,
|
|
||||||
which can be done by using a *templated conversion operator*.
|
|
||||||
|
|
||||||
So the solution to our problem is to do the following:
|
|
||||||
|
|
||||||
```c++
|
|
||||||
class ToArray {
|
|
||||||
std::string_view s_;
|
|
||||||
|
|
||||||
public:
|
|
||||||
ToArray(std::string_view s) : s_(s) {}
|
|
||||||
|
|
||||||
template <size_t N>
|
|
||||||
operator std::array<char, N>() const {
|
|
||||||
std::array<char, N> ret;
|
|
||||||
std::copy(s_.begin(), s_.end(), ret.begin());
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
ToArray to_array(std::string_view s) {
|
|
||||||
return ToArray{s};
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
The following steps happen when trying to call `takes_small_array(to_array(s))`:
|
|
||||||
|
|
||||||
* `to_array(s)` returns a `ToArray` value.
|
|
||||||
* the `ToArray` value is not an `array<char, 32>`, but has an implicit
|
|
||||||
conversion operator, which the compiler invokes.
|
|
||||||
* `takes_small_array` is called with the converted `array<char, 32>` value.
|
|
||||||
|
|
||||||
We now have a "magic" function which can convert a `string_view` to an
|
|
||||||
`std::array` of characters of any size. We could further improve this by
|
|
||||||
ensuring that the array is terminated with a `'\0'`, throwing an exception when
|
|
||||||
the array is too small for the given string, etc... This is left as an exercise
|
|
||||||
to the reader.
|
|
|
@ -1,329 +0,0 @@
|
||||||
---
|
|
||||||
title: "Multiple Dispatch in C++"
|
|
||||||
date: 2022-11-02T16:36:53+01:00
|
|
||||||
draft: false # I don't care for draft mode, git has branches for that
|
|
||||||
description: "A Lisp super-power in C++"
|
|
||||||
tags:
|
|
||||||
- c++
|
|
||||||
- design-pattern
|
|
||||||
categories:
|
|
||||||
- programming
|
|
||||||
series:
|
|
||||||
favorite: false
|
|
||||||
disable_feed: false
|
|
||||||
---
|
|
||||||
|
|
||||||
A great feature that can be used in more dynamic languages is *multiple
|
|
||||||
dispatch*. Here's an example in [Julia][julia-lang] taken from the [Wikipedia
|
|
||||||
article][wiki-multiple-dispatch].
|
|
||||||
|
|
||||||
```julia
|
|
||||||
abstract type SpaceObject end
|
|
||||||
|
|
||||||
struct Asteroid <: SpaceObject
|
|
||||||
# Asteroid fields
|
|
||||||
end
|
|
||||||
struct Spaceship <: SpaceObject
|
|
||||||
# Spaceship fields
|
|
||||||
end
|
|
||||||
|
|
||||||
collide_with(::Asteroid, ::Spaceship) = # Asteroid/Spaceship collision
|
|
||||||
collide_with(::Spaceship, ::Asteroid) = # Spaceship/Asteroid collision
|
|
||||||
collide_with(::Spaceship, ::Spaceship) = # Spaceship/Spaceship collision
|
|
||||||
collide_with(::Asteroid, ::Asteroid) = # Asteroid/Asteroid collision
|
|
||||||
|
|
||||||
collide(x::SpaceObject, y::SpaceObject) = collide_with(x, y)
|
|
||||||
```
|
|
||||||
|
|
||||||
The `collide` function calls `collide_with` which, at runtime, will inspect the
|
|
||||||
types of its arguments and *dispatch* to the appropriate implementation.
|
|
||||||
|
|
||||||
Julia was created with multiple dispatch as a first-class citizen, it is used
|
|
||||||
liberally in its ecosystem. C++ does not have access to such a feature natively,
|
|
||||||
but there are alternatives that I will be presenting in this article, and try to
|
|
||||||
justify there uses and limitations.
|
|
||||||
|
|
||||||
[julia-lang]: https://julialang.org/
|
|
||||||
[wiki-multiple-dispatch]: https://en.wikipedia.org/wiki/Multiple_dispatch
|
|
||||||
<!--more-->
|
|
||||||
|
|
||||||
## Single dispatch
|
|
||||||
|
|
||||||
The native way to perform dynamic dispatch in C++ is through the
|
|
||||||
use of *virtual methods*, which allows an object to *override* the behaviour of
|
|
||||||
one of its super-classes' method.
|
|
||||||
|
|
||||||
Invoking a virtual method will perform *single dispatch*, on the dynamic type
|
|
||||||
of the object who's method is being called.
|
|
||||||
|
|
||||||
Here is an example:
|
|
||||||
|
|
||||||
```cpp
|
|
||||||
struct SpaceObject {
|
|
||||||
virtual ~SpaceObject() = default;
|
|
||||||
|
|
||||||
// Pure virtual method, which must be overridden by non-abstract sub-classes
|
|
||||||
virtual void impact() = 0;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct Asteroid : SpaceObject {
|
|
||||||
// Override the method for asteroid impacts
|
|
||||||
void impact() override {
|
|
||||||
std::cout << "Bang!\n";
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
struct Spaceship : SpaceObject {
|
|
||||||
// Override the method for spaceship impacts
|
|
||||||
void impact() override {
|
|
||||||
std::cout << "Crash!\n";
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
int main() {
|
|
||||||
std::unique_ptr<SpaceObject> object = std::make_unique<Spaceship>();
|
|
||||||
object->impact(); // Prints "Crash!"
|
|
||||||
|
|
||||||
object = std::make_unique<Asteroid>();
|
|
||||||
object->impact(); // Prints "Bang!"
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
Virtual methods are great when you want to represent a common set of behaviour
|
|
||||||
(an *interface*), and be able to substitute various types with their specific
|
|
||||||
implementation.
|
|
||||||
|
|
||||||
For example, a dummy file-system interface might look like the following:
|
|
||||||
|
|
||||||
```cpp
|
|
||||||
struct Filesystem {
|
|
||||||
virtual void write(std::string_view filename, std::span<char> data) = 0;
|
|
||||||
virtual std::vector<char> read(std::string_view filename) = 0;
|
|
||||||
virtual void delete(std::string_view filename) = 0;
|
|
||||||
};
|
|
||||||
```
|
|
||||||
|
|
||||||
You can then write `PosixFilesystem` which makes use of the POSIX API and
|
|
||||||
interact with actual on-disk data, `MockFilesystem` which only works in-memory
|
|
||||||
and can be used for testing, etc...
|
|
||||||
|
|
||||||
## Double dispatch through the Visitor pattern
|
|
||||||
|
|
||||||
Sometimes single dispatch is not enough, such as in the collision example at the
|
|
||||||
beginning of this article. In cases where a computation depends on the dynamic
|
|
||||||
type of *two* of its values, we can make use of double-dispatch by leveraging
|
|
||||||
the Visitor design pattern. This is done by calling a virtual method on the
|
|
||||||
first value, which itself will call a virtual method on the second value.
|
|
||||||
|
|
||||||
Here's a commentated example:
|
|
||||||
|
|
||||||
```cpp
|
|
||||||
struct Asteroid;
|
|
||||||
struct Spaceship;
|
|
||||||
|
|
||||||
struct SpaceObject {
|
|
||||||
virtual ~SpaceObject() = default;
|
|
||||||
|
|
||||||
// Only used to kick-start the double-dispatch process
|
|
||||||
virtual void collide_with(SpaceObject& other) = 0;
|
|
||||||
|
|
||||||
// The actual dispatching methods
|
|
||||||
virtual void collide_with(Asteroid& other) = 0;
|
|
||||||
virtual void collide_with(Spaceship& other) = 0;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct Asteroid : SpaceObject {
|
|
||||||
void collide_with(SpaceObject& other) override {
|
|
||||||
// `*this` is an `Asteroid&` which kick-starts the double-dispatch
|
|
||||||
other.collide_with(*this);
|
|
||||||
};
|
|
||||||
|
|
||||||
void collide_with(Asteroid& other) override { /* Asteroid/Asteroid */ };
|
|
||||||
void collide_with(Spaceship& other) override { /* Asteroid/Spaceship */ };
|
|
||||||
};
|
|
||||||
|
|
||||||
struct Spaceship : SpaceObject {
|
|
||||||
void collide_with(SpaceObject& other) override {
|
|
||||||
// `*this` is a `Spaceship&` which kick-starts the double-dispatch
|
|
||||||
other.collide_with(*this);
|
|
||||||
};
|
|
||||||
|
|
||||||
void collide_with(Asteroid& other) override { /* Spaceship/Asteroid */ };
|
|
||||||
void collide_with(Spaceship& other) override { /* Spaceship/Spaceship */ };
|
|
||||||
};
|
|
||||||
|
|
||||||
void collide(SpaceObject& first, SpaceObject& second) {
|
|
||||||
first.collide_with(second);
|
|
||||||
};
|
|
||||||
|
|
||||||
int main() {
|
|
||||||
auto asteroid = std::make_unique<Asteroid>();
|
|
||||||
auto spaceship = std::make_unique<Spaceship>();
|
|
||||||
|
|
||||||
collide(*asteroid, *spaceship);
|
|
||||||
// Calls in order:
|
|
||||||
// - Asteroid::collide_with(SpaceObject&)
|
|
||||||
// - Spaceship::collide_with(Asteroid&)
|
|
||||||
|
|
||||||
collide(*spaceship, *asteroid);
|
|
||||||
// Calls in order:
|
|
||||||
// - Spaceship::collide_with(SpaceObject&)
|
|
||||||
// - Asteroid::collide_with(Spaceship&)
|
|
||||||
|
|
||||||
asteroid->collide_with(*spaceship);
|
|
||||||
// Only calls Asteroid::collide_with(Spaceship&)
|
|
||||||
|
|
||||||
spaceship->collide_with(*asteroid);
|
|
||||||
// Only calls Spaceship::collide_with(Asteroid&)
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
Double dispatch is pattern is most commonly used with the *visitor pattern*, in
|
|
||||||
which a closed class hierarchy (the data) is separated from an open class
|
|
||||||
hierarchy (the algorithms acting on that data). This is especially useful in
|
|
||||||
e.g: compilers, where the AST class hierarchy represents the data *only*, and
|
|
||||||
all compiler stages and optimization passes are programmed by a series of
|
|
||||||
visitors.
|
|
||||||
|
|
||||||
One downside of this approach is that if you want to add `SpaceStation` as
|
|
||||||
a sub-class of `SpaceObject`, and handle its collisions with other
|
|
||||||
`SpaceObject`s, you need to:
|
|
||||||
|
|
||||||
* Implement all `collide_with` methods for this new class.
|
|
||||||
* Add a new virtual method `collide_with(SpaceStation&)` and implement it on
|
|
||||||
every sub-class.
|
|
||||||
|
|
||||||
This can be inconvenient if your class hierarchy changes often.
|
|
||||||
|
|
||||||
## Multiple dispatch on a closed class hierarchy
|
|
||||||
|
|
||||||
When even double dispatch is not enough, there is a way to do multiple dispatch
|
|
||||||
in standard C++, included in the STL since C++17. However unlike the previous
|
|
||||||
methods I showed, this one relies on using [`std::variant`][variant-cppref] and
|
|
||||||
[`std::visit`][visit-cppref].
|
|
||||||
|
|
||||||
[variant-cppref]: https://en.cppreference.com/w/cpp/utility/variant
|
|
||||||
[visit-cppref]: https://en.cppreference.com/w/cpp/utility/variant/visit
|
|
||||||
|
|
||||||
The limitation of `std::variant` is that you are limited to the types you can
|
|
||||||
select at *compile-time* for the values used during your dispatch operation.
|
|
||||||
You have a *closed* hierarchy of classes, which is the explicit list of types in
|
|
||||||
your `variant`.
|
|
||||||
|
|
||||||
Nonetheless, if you can live with that limitation, then you have a great amount
|
|
||||||
of power available to you. I have used `std::visit` in the past to mimic the
|
|
||||||
effect of pattern matching.
|
|
||||||
|
|
||||||
In this example, I re-create the double-dispatch from the previous section:
|
|
||||||
|
|
||||||
```cpp
|
|
||||||
// No need to inherit from a `SpaceObject` base class
|
|
||||||
struct Asteroid {};
|
|
||||||
struct Spaceship {};
|
|
||||||
|
|
||||||
// But the list of possible runtime *must* be enumerated at compile-time
|
|
||||||
using SpaceObject = std::variant<Asteroid, Spaceship>;
|
|
||||||
|
|
||||||
void collide(SpaceObject& first, SpaceObject& second) {
|
|
||||||
struct CollideDispatch {
|
|
||||||
void operator()(Asteroid& first, Asteroid& second) {
|
|
||||||
// Asteroid/Asteroid
|
|
||||||
}
|
|
||||||
void operator()(Asteroid& first, Spaceship& second) {
|
|
||||||
// Asteroid/Spaceship
|
|
||||||
}
|
|
||||||
void operator()(Spaceship& first, Asteroid& second) {
|
|
||||||
// Spaceship/Asteroid
|
|
||||||
}
|
|
||||||
void operator()(Spaceship& first, Spaceship& second) {
|
|
||||||
// Spaceship/Spaceship
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
std::visit(CollideDispatch(), first, second);
|
|
||||||
}
|
|
||||||
|
|
||||||
int main() {
|
|
||||||
SpaceObject asteroid = Asteroid();
|
|
||||||
SpaceObject spaceship = Spaceship();
|
|
||||||
|
|
||||||
collide(asteroid, spaceship);
|
|
||||||
// Calls CollideDispatch::operator()(Asteroid&, Spaceship&)
|
|
||||||
|
|
||||||
collide(spaceship, asteroid);
|
|
||||||
// Calls CollideDispatch::operator()(Spaceship&, Asteroid&)
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
Obviously, the issue with adding a new `SpaceStation` variant is once again
|
|
||||||
apparent in this implementation. You will get a compile error unless you handle
|
|
||||||
this new `SpaceStation` variant at every point you `visit` the `SpaceObject`s.
|
|
||||||
|
|
||||||
## The Expression Problem
|
|
||||||
|
|
||||||
One issue we have not been able to move past in these examples is the
|
|
||||||
[Expression Problem][expression-problem]. In two words, this means that we can't
|
|
||||||
add a new data type (e.g: `SpaceStation`), or a new operation (e.g: `land_on`)
|
|
||||||
to our current code without re-compiling it.
|
|
||||||
|
|
||||||
[expression-problem]: https://en.wikipedia.org/wiki/Expression_problem
|
|
||||||
|
|
||||||
This is the downside I was pointing out in our previous sections:
|
|
||||||
|
|
||||||
* Data type extension: one can easily add a new `SpaceObject` child-class in the
|
|
||||||
OOP version, but needs to modify each implementation if we want to add a new
|
|
||||||
method to the `SpaceObject` interface to implement a new operation.
|
|
||||||
* Operation extension: one can easily create a new function when using the
|
|
||||||
`std::variant` based representation, as pattern-matching easily allows us to
|
|
||||||
only handle the kinds of values we are interested in. But adding a new
|
|
||||||
`SpaceObject` variant means we need to modify and re-compile every
|
|
||||||
`std::visit` call to handle the new variant.
|
|
||||||
|
|
||||||
There is currently no (good) way in standard C++ to tackle the Expression
|
|
||||||
Problem. A paper ([N2216][N2216]) was written to propose a new language feature
|
|
||||||
to improve the situation. However it looks quite complex, and never got followed
|
|
||||||
up on for standardization.
|
|
||||||
|
|
||||||
[N2216]: https://open-std.org/jtc1/sc22/wg21/docs/papers/2007/n2216.pdf
|
|
||||||
|
|
||||||
In the meantime, one can find some libraries (like [`yomm2`][yomm2]) that
|
|
||||||
reduce the amount of boiler-plate needed to emulate this feature.
|
|
||||||
|
|
||||||
[yomm2]: https://github.com/jll63/yomm2
|
|
||||||
|
|
||||||
```cpp
|
|
||||||
#include <yorel/yomm2/keywords.hpp>
|
|
||||||
|
|
||||||
struct SpaceObject {
|
|
||||||
virtual ~SpaceObject() = default;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct Asteroid : SpaceObject { /* fields, methods, etc... */ };
|
|
||||||
|
|
||||||
struct Spaceship : SpaceObject { /* fields, methods, etc... */ };
|
|
||||||
|
|
||||||
// Register all sub-classes of `SpaceObject` for use with open methods
|
|
||||||
register_classes(SpaceObject, Asteroid, Spaceship);
|
|
||||||
|
|
||||||
// Register the `collide` open method, which dispatches on two arguments
|
|
||||||
declare_method(void, collide, (virtual_<SpaceObject&>, virtual_<SpaceObject&>));
|
|
||||||
|
|
||||||
// Write the different implementations of `collide`
|
|
||||||
define_method(void, collide, (Asteroid& left, Asteroid& right)) { /* work */ }
|
|
||||||
define_method(void, collide, (Asteroid& left, Spaceship& right)) { /* work */ }
|
|
||||||
define_method(void, collide, (Spaceship& left, Asteroid& right)) { /* work */ }
|
|
||||||
define_method(void, collide, (Spaceship& left, Spaceship& right)) { /* work */ }
|
|
||||||
|
|
||||||
|
|
||||||
int main() {
|
|
||||||
yorel::yomm2::update_methods();
|
|
||||||
|
|
||||||
auto asteroid = std::make_unique<Asteroid>();
|
|
||||||
auto spaceship = std::make_unique<Spaceship>();
|
|
||||||
|
|
||||||
collide(*asteroid, *spaceship); // Calls (Asteroid, Spaceship) version
|
|
||||||
collide(*spaceship, *asteroid); // Calls (Spaceship, Asteroid) version
|
|
||||||
collide(*asteroid, *asteroid); // Calls (Asteroid, Asteroid) version
|
|
||||||
collide(*spaceship, *spaceship); // Calls (Spaceship, Spaceship) version
|
|
||||||
}
|
|
||||||
```
|
|
|
@ -1,157 +0,0 @@
|
||||||
---
|
|
||||||
title: "Union Find"
|
|
||||||
date: 2024-06-24T21:07:49+01:00
|
|
||||||
draft: false # I don't care for draft mode, git has branches for that
|
|
||||||
description: "My favorite data structure"
|
|
||||||
tags:
|
|
||||||
- algorithms
|
|
||||||
- data structures
|
|
||||||
- python
|
|
||||||
categories:
|
|
||||||
- programming
|
|
||||||
series:
|
|
||||||
- Cool algorithms
|
|
||||||
favorite: false
|
|
||||||
disable_feed: false
|
|
||||||
---
|
|
||||||
|
|
||||||
To kickoff the [series]({{< ref "/series/cool-algorithms/" >}}) of posts about
|
|
||||||
algorithms and data structures I find interesting, I will be talking about my
|
|
||||||
favorite one: the [_Disjoint Set_][wiki]. Also known as the _Union-Find_ data
|
|
||||||
structure, so named because of its two main operations: `ds.union(lhs, rhs)` and
|
|
||||||
`ds.find(elem)`.
|
|
||||||
|
|
||||||
[wiki]: https://en.wikipedia.org/wiki/Disjoint-set_data_structure
|
|
||||||
|
|
||||||
<!--more-->
|
|
||||||
|
|
||||||
## What does it do?
|
|
||||||
|
|
||||||
The _Union-Find_ data structure allows one to store a collection of sets of
|
|
||||||
elements, with operations for adding new sets, merging two sets into one, and
|
|
||||||
finding the representative member of a set. Not only does it do all that, but it
|
|
||||||
does it in almost constant (amortized) time!
|
|
||||||
|
|
||||||
Here is a small motivating example for using the _Disjoint Set_ data structure:
|
|
||||||
|
|
||||||
```python
|
|
||||||
def connected_components(graph: Graph) -> list[set[Node]]:
|
|
||||||
# Initialize the disjoint set so that each node is in its own set
|
|
||||||
ds: DisjointSet[Node] = DisjointSet(graph.nodes)
|
|
||||||
# Each edge is a connection, merge both sides into the same set
|
|
||||||
for (start, dest) in graph.edges:
|
|
||||||
ds.union(start, dest)
|
|
||||||
# Connected components share the same (arbitrary) root
|
|
||||||
components: dict[Node, set[Node]] = defaultdict(set)
|
|
||||||
for n in graph.nodes:
|
|
||||||
components[ds.find(n)].add(n)
|
|
||||||
# Return a list of disjoint sets corresponding to each connected component
|
|
||||||
return list(components.values())
|
|
||||||
```
|
|
||||||
|
|
||||||
## Implementation
|
|
||||||
|
|
||||||
I will show how to implement `UnionFind` for integers, though it can easily be
|
|
||||||
extended to be used with arbitrary types (e.g: by mapping each element
|
|
||||||
one-to-one to a distinct integer, or using a different set representation).
|
|
||||||
|
|
||||||
### Representation
|
|
||||||
|
|
||||||
Creating a new disjoint set is easy enough:
|
|
||||||
|
|
||||||
```python
|
|
||||||
class UnionFind:
|
|
||||||
_parent: list[int]
|
|
||||||
_rank: list[int]
|
|
||||||
|
|
||||||
def __init__(self, size: int):
|
|
||||||
# Each node is in its own set, making it its own parent...
|
|
||||||
self._parents = list(range(size))
|
|
||||||
# ... And its rank 0
|
|
||||||
self._rank = [0] * size
|
|
||||||
```
|
|
||||||
|
|
||||||
We represent each set through the `_parent` field: each element of the set is
|
|
||||||
linked to its parent, until the root node which is its own parent. When first
|
|
||||||
initializing the structure, each element is in its own set, so we initialize
|
|
||||||
each element to be a root and make it its own parent (`_parent[i] == i` for all
|
|
||||||
`i`).
|
|
||||||
|
|
||||||
The `_rank` field is an optimization which we will touch on in a later section.
|
|
||||||
|
|
||||||
### Find
|
|
||||||
|
|
||||||
A naive Implementation of `find(...)` is simple enough to write:
|
|
||||||
|
|
||||||
```python
|
|
||||||
def find(self, elem: int) -> int:
|
|
||||||
# If `elem` is its own parent, then it is the root of the tree
|
|
||||||
if (parent := self._parent[elem]) == elem:
|
|
||||||
return elem
|
|
||||||
# Otherwise, recurse on the parent
|
|
||||||
return self.find(parent)
|
|
||||||
```
|
|
||||||
|
|
||||||
However, going back up the chain of parents each time we want to find the root
|
|
||||||
node (an `O(n)` operation) would make for disastrous performance. Instead we can
|
|
||||||
do a small optimization called _path splitting_.
|
|
||||||
|
|
||||||
```python
|
|
||||||
def find(self, elem: int) -> int:
|
|
||||||
while (parent := self._parent[elem]) != elem:
|
|
||||||
# Replace each parent link by a link to the grand-parent
|
|
||||||
elem, self._parent[elem] = parent, self._parent[parent]
|
|
||||||
return elem
|
|
||||||
```
|
|
||||||
|
|
||||||
This flattens the chain so that each node links more directly to the root (the
|
|
||||||
length is reduced by half), making each subsequent `find(...)` faster.
|
|
||||||
|
|
||||||
Other compression schemes exist, along the spectrum between faster shortening
|
|
||||||
the chain faster earlier, or updating `_parent` fewer times per `find(...)`.
|
|
||||||
|
|
||||||
### Union
|
|
||||||
|
|
||||||
A naive implementation of `union(...)` is simple enough to write:
|
|
||||||
|
|
||||||
```python
|
|
||||||
def union(self, lhs: int, rhs: int) -> int:
|
|
||||||
# Replace both element by their root parent
|
|
||||||
lhs = self.find(lhs)
|
|
||||||
rhs = self.find(rhs)
|
|
||||||
# arbitrarily merge one into the other
|
|
||||||
self._parent[rhs] = lhs
|
|
||||||
# Return the new root
|
|
||||||
return lhs
|
|
||||||
```
|
|
||||||
|
|
||||||
Once again, improvements can be made. Depending on the order in which we call
|
|
||||||
`union(...)`, we might end up creating a long chain from the leaf of the tree to
|
|
||||||
the root node, leading to slower `find(...)` operations. If at all possible, we
|
|
||||||
would like to keep the trees as shallow as possible.
|
|
||||||
|
|
||||||
To do so, we want to avoid merging taller trees into smaller ones, so as to keep
|
|
||||||
them as balanced as possible. Since a higher tree will result in a slower
|
|
||||||
`find(...)`, keeping the trees balanced will lead to increased performance.
|
|
||||||
|
|
||||||
This is where the `_rank` field we mentioned earlier comes in: the _rank_ of an
|
|
||||||
element is an upper bound on its height in the tree. By keeping track of this
|
|
||||||
_approximate_ height, we can keep the trees balanced when merging them.
|
|
||||||
|
|
||||||
```python
|
|
||||||
def union(self, lhs: int, rhs: int) -> int:
|
|
||||||
lhs = self.find(lhs)
|
|
||||||
rhs = self.find(rhs)
|
|
||||||
# Bail out early if they already belong to the same set
|
|
||||||
if lhs == rhs:
|
|
||||||
return lhs
|
|
||||||
# Always keep `lhs` as the taller tree
|
|
||||||
if (self._rank[lhs] < self._rank[rhs])
|
|
||||||
lhs, rhs = rhs, lhs
|
|
||||||
# Merge the smaller tree into the taller one
|
|
||||||
self._parent[rhs] = lhs
|
|
||||||
# Update the rank when merging trees of approximately the same size
|
|
||||||
if self._rank[lhs] == self._rank[rhs]:
|
|
||||||
self._rank[lhs] += 1
|
|
||||||
return lhs
|
|
||||||
```
|
|
|
@ -1,171 +0,0 @@
|
||||||
---
|
|
||||||
title: "Trie"
|
|
||||||
date: 2024-06-30T11:07:49+01:00
|
|
||||||
draft: false # I don't care for draft mode, git has branches for that
|
|
||||||
description: "A cool map"
|
|
||||||
tags:
|
|
||||||
- algorithms
|
|
||||||
- data structures
|
|
||||||
- python
|
|
||||||
categories:
|
|
||||||
- programming
|
|
||||||
series:
|
|
||||||
- Cool algorithms
|
|
||||||
favorite: false
|
|
||||||
disable_feed: false
|
|
||||||
---
|
|
||||||
|
|
||||||
This time, let's talk about the [_Trie_][wiki], which is a tree-based mapping
|
|
||||||
structure most often used for string keys.
|
|
||||||
|
|
||||||
[wiki]: https://en.wikipedia.org/wiki/Trie
|
|
||||||
|
|
||||||
<!--more-->
|
|
||||||
|
|
||||||
## What does it do?
|
|
||||||
|
|
||||||
A _Trie_ can be used to map a set of string keys to their corresponding values,
|
|
||||||
without the need for a hash function. This also means you won't suffer from hash
|
|
||||||
collisions, though the tree-based structure will probably translate to slower
|
|
||||||
performance than a good hash table.
|
|
||||||
|
|
||||||
A _Trie_ is especially useful to represent a dictionary of words in the case of
|
|
||||||
spell correction, as it can easily be used to fuzzy match words under a given
|
|
||||||
edit distance (think [Levenshtein distance])
|
|
||||||
|
|
||||||
[Levenshtein distance]: https://en.wikipedia.org/wiki/Levenshtein_distance
|
|
||||||
|
|
||||||
## Implementation
|
|
||||||
|
|
||||||
This implementation will be in Python for exposition purposes, even though
|
|
||||||
it already has a built-in `dict`.
|
|
||||||
|
|
||||||
### Representation
|
|
||||||
|
|
||||||
Creating a new `Trie` is easy: the root node starts off empty and without any
|
|
||||||
mapped values.
|
|
||||||
|
|
||||||
```python
|
|
||||||
class Trie[T]:
|
|
||||||
_children: dict[str, Trie[T]]
|
|
||||||
_value: T | None
|
|
||||||
|
|
||||||
def __init__(self):
|
|
||||||
# Each letter is mapped to a Trie
|
|
||||||
self._children = defaultdict(Trie)
|
|
||||||
# If we match a full string, we store the mapped value
|
|
||||||
self._value = None
|
|
||||||
```
|
|
||||||
|
|
||||||
We're using a `defaultdict` for the children for ease of implementation in this
|
|
||||||
post. In reality, I would encourage you exit early when you can't match a given
|
|
||||||
character.
|
|
||||||
|
|
||||||
The string key will be implicit by the position of a node in the tree: the empty
|
|
||||||
string at the root, one-character strings as its direct children, etc...
|
|
||||||
|
|
||||||
### Search
|
|
||||||
|
|
||||||
An exact match look-up is easily done: we go down the tree until we've exhausted
|
|
||||||
the key. At that point we've either found a mapped value or not.
|
|
||||||
|
|
||||||
```python
|
|
||||||
def get(self, key: str) -> T | None:
|
|
||||||
# Have we matched the full key?
|
|
||||||
if not key:
|
|
||||||
# Store the `T` if mapped, `None` otherwise
|
|
||||||
return self._value
|
|
||||||
# Otherwise, recurse on the child corresponding to the first letter
|
|
||||||
return self._children[key[0]].get(key[1:])
|
|
||||||
```
|
|
||||||
|
|
||||||
### Insertion
|
|
||||||
|
|
||||||
Adding a new value to the _Trie_ is similar to a key lookup, only this time we
|
|
||||||
store the new value instead of returning it.
|
|
||||||
|
|
||||||
```python
|
|
||||||
def insert(self, key: str, value: T) -> bool:
|
|
||||||
# Have we matched the full key?
|
|
||||||
if not key:
|
|
||||||
# Check whether we're overwriting a previous mapping
|
|
||||||
was_mapped = self._value is None
|
|
||||||
# Store the corresponding value
|
|
||||||
self._value = value
|
|
||||||
# Return whether we've performed an overwrite
|
|
||||||
return was_mapped
|
|
||||||
# Otherwise, recurse on the child corresponding to the first letter
|
|
||||||
return self._children[key[0]].insert(key[1:], value)
|
|
||||||
```
|
|
||||||
|
|
||||||
### Removal
|
|
||||||
|
|
||||||
Removal should also look familiar.
|
|
||||||
|
|
||||||
```python
|
|
||||||
def remove(self, key: str) -> bool:
|
|
||||||
# Have we matched the full key?
|
|
||||||
if not key:
|
|
||||||
was_mapped = self._value is None
|
|
||||||
# Remove the value
|
|
||||||
self._value = None
|
|
||||||
# Return whether it was mapped
|
|
||||||
return was_mapped
|
|
||||||
# Otherwise, recurse on the child corresponding to the first letter
|
|
||||||
return self._children[key[0]].remove(key[1:])
|
|
||||||
```
|
|
||||||
|
|
||||||
### Fuzzy matching
|
|
||||||
|
|
||||||
Fuzzily matching a given word is where the real difficulty is: the key is to
|
|
||||||
realize we can use the prefix-tree nature of a _Trie_ to avoid doing wasteful
|
|
||||||
work.
|
|
||||||
|
|
||||||
By leveraging the prefix visit order of the tree, we can build an iterative
|
|
||||||
Levenshtein distance matrix, in much the same way one would do so in its
|
|
||||||
[Dynamic Programming] implementation (see the [Wagner-Fisher algorithm]).
|
|
||||||
|
|
||||||
[Dynamic Programming]: https://en.wikipedia.org/wiki/Dynamic_programming
|
|
||||||
[Wagner-Fisher algorithm]: https://en.wikipedia.org/wiki/Wagner%E2%80%93Fischer_algorithm
|
|
||||||
|
|
||||||
```python
|
|
||||||
class FuzzyResult[T](NamedTuple):
|
|
||||||
distance: int
|
|
||||||
key: str
|
|
||||||
value: T
|
|
||||||
|
|
||||||
|
|
||||||
def get_fuzzy(self, key: str, max_distance: int = 0) -> Iterator[FuzzyResult[T]]:
|
|
||||||
def helper(
|
|
||||||
current_word: str,
|
|
||||||
node: Trie[T],
|
|
||||||
previous_row: list[int],
|
|
||||||
) -> Iterator[tuple[int, T]]:
|
|
||||||
# Iterative Levenshtein
|
|
||||||
current_row = [previous_row[0] + 1]
|
|
||||||
current_char = current_word[-1]
|
|
||||||
for column, key_char in enumerate(key, start=1):
|
|
||||||
insertion = current_row[column - 1] + 1
|
|
||||||
deletion = previous_row[column] + 1
|
|
||||||
replacement = previous_row[column - 1] + (key_char != current_char)
|
|
||||||
current_row.append(min(insertion, deletion, replacement))
|
|
||||||
|
|
||||||
# If we are under the max distance, match this node
|
|
||||||
if (distance := current_row[-1]) <= max_distance and node._value != None:
|
|
||||||
# Only if it has a value of course
|
|
||||||
yield FuzzyResult(distance, current_word, node._value)
|
|
||||||
|
|
||||||
# If we can potentially still match children, recurse
|
|
||||||
if min(current_row) <= max_distance:
|
|
||||||
for c, child in node._children.items():
|
|
||||||
yield from helper(current_word + c, child, current_row)
|
|
||||||
|
|
||||||
# Build the first row -- the edit distance from the empty string
|
|
||||||
row = list(range(len(key) + 1))
|
|
||||||
|
|
||||||
# Base case for the empty string
|
|
||||||
if (distance := row[-1]) <= max_distance and self._value != None:
|
|
||||||
yield FuzzyResult(distance, "", self._value)
|
|
||||||
for c, child in self._children.items():
|
|
||||||
yield from helper(c, child, row)
|
|
||||||
```
|
|
|
@ -1,191 +0,0 @@
|
||||||
---
|
|
||||||
title: "Gap Buffer"
|
|
||||||
date: 2024-07-06T21:27:19+01:00
|
|
||||||
draft: false # I don't care for draft mode, git has branches for that
|
|
||||||
description: "As featured in GNU Emacs"
|
|
||||||
tags:
|
|
||||||
- algorithms
|
|
||||||
- data structures
|
|
||||||
- python
|
|
||||||
categories:
|
|
||||||
- programming
|
|
||||||
series:
|
|
||||||
- Cool algorithms
|
|
||||||
favorite: false
|
|
||||||
disable_feed: false
|
|
||||||
---
|
|
||||||
|
|
||||||
The [_Gap Buffer_][wiki] is a popular data structure for text editors to
|
|
||||||
represent files and editable buffers. The most famous of them probably being
|
|
||||||
[GNU Emacs][emacs].
|
|
||||||
|
|
||||||
[wiki]: https://en.wikipedia.org/wiki/Gap_buffer
|
|
||||||
[emacs]: https://www.gnu.org/software/emacs/manual/html_node/elisp/Buffer-Gap.html
|
|
||||||
|
|
||||||
<!--more-->
|
|
||||||
|
|
||||||
## What does it do?
|
|
||||||
|
|
||||||
A _Gap Buffer_ is simply a list of characters, similar to a normal string, with
|
|
||||||
the added twist of splitting it into two side: the prefix and suffix, on either
|
|
||||||
side of the cursor. In between them, a gap is left to allow for quick
|
|
||||||
insertion at the cursor.
|
|
||||||
|
|
||||||
Moving the cursor moves the gap around the buffer, the prefix and suffix getting
|
|
||||||
shorter/longer as required.
|
|
||||||
|
|
||||||
## Implementation
|
|
||||||
|
|
||||||
I'll be writing a sample implementation in Python, as with the rest of the
|
|
||||||
[series]({{< ref "/series/cool-algorithms/" >}}). I don't think it showcases the
|
|
||||||
elegance of the _Gap Buffer_ in action like a C implementation full of
|
|
||||||
`memmove`s would, but it does makes it short and sweet.
|
|
||||||
|
|
||||||
### Representation
|
|
||||||
|
|
||||||
We'll be representing the gap buffer as an actual list of characters.
|
|
||||||
|
|
||||||
Given that Python doesn't _have_ characters, let's settle for a list of strings,
|
|
||||||
each representing a single character...
|
|
||||||
|
|
||||||
```python
|
|
||||||
Char = str
|
|
||||||
|
|
||||||
class GapBuffer:
|
|
||||||
# List of characters, contains prefix and suffix of string with gap in the middle
|
|
||||||
_buf: list[Char]
|
|
||||||
# The gap is contained between [start, end) (i.e: buf[start:end])
|
|
||||||
_gap_start: int
|
|
||||||
_gap_end: int
|
|
||||||
|
|
||||||
# Visual representation of the gap buffer:
|
|
||||||
# This is a very [ ]long string.
|
|
||||||
# |<----------------------------------------------->| capacity
|
|
||||||
# |<------------>| |<-------->| string
|
|
||||||
# |<------------------->| gap
|
|
||||||
# |<------------>| prefix
|
|
||||||
# |<-------->| suffix
|
|
||||||
def __init__(self, initial_capacity: int = 16) -> None:
|
|
||||||
assert initial_capacity > 0
|
|
||||||
# Initialize an empty gap buffer
|
|
||||||
self._buf = [""] * initial_capacity
|
|
||||||
self._gap_start = 0
|
|
||||||
self._gap_end = initial_capacity
|
|
||||||
```
|
|
||||||
|
|
||||||
### Accessors
|
|
||||||
|
|
||||||
I'm mostly adding these for exposition, and making it easier to write `assert`s
|
|
||||||
later.
|
|
||||||
|
|
||||||
```python
|
|
||||||
@property
|
|
||||||
def capacity(self) -> int:
|
|
||||||
return len(self._buf)
|
|
||||||
|
|
||||||
@property
|
|
||||||
def gap_length(self) -> int:
|
|
||||||
return self._gap_end - self._gap_start
|
|
||||||
|
|
||||||
@property
|
|
||||||
def string_length(self) -> int:
|
|
||||||
return self.capacity - self.gap_length
|
|
||||||
|
|
||||||
@property
|
|
||||||
def prefix_length(self) -> int:
|
|
||||||
return self._gap_start
|
|
||||||
|
|
||||||
@property
|
|
||||||
def suffix_length(self) -> int:
|
|
||||||
return self.capacity - self._gap_end
|
|
||||||
```
|
|
||||||
|
|
||||||
### Growing the buffer
|
|
||||||
|
|
||||||
I've written this method in a somewhat non-idiomatic manner, to make it closer
|
|
||||||
to how it would look in C using `realloc` instead.
|
|
||||||
|
|
||||||
It would be more efficient to use slicing to insert the needed extra capacity
|
|
||||||
directly, instead of making a new buffer and copying characters over.
|
|
||||||
|
|
||||||
```python
|
|
||||||
def grow(self, capacity: int) -> None:
|
|
||||||
assert capacity >= self.capacity
|
|
||||||
# Create a new buffer with the new capacity
|
|
||||||
new_buf = [""] * capacity
|
|
||||||
# Move the prefix/suffix to their place in the new buffer
|
|
||||||
added_capacity = capacity - len(self._buf)
|
|
||||||
new_buf[: self._gap_start] = self._buf[: self._gap_start]
|
|
||||||
new_buf[self._gap_end + added_capacity :] = self._buf[self._gap_end :]
|
|
||||||
# Use the new buffer, account for added capacity
|
|
||||||
self._buf = new_buf
|
|
||||||
self._gap_end += added_capacity
|
|
||||||
```
|
|
||||||
|
|
||||||
### Insertion
|
|
||||||
|
|
||||||
Inserting text at the cursor's position means filling up the gap in the middle
|
|
||||||
of the buffer. To do so we must first make sure that the gap is big enough, or
|
|
||||||
grow the buffer accordingly.
|
|
||||||
|
|
||||||
Then inserting the text is simply a matter of copying its characters in place,
|
|
||||||
and moving the start of the gap further right.
|
|
||||||
|
|
||||||
```python
|
|
||||||
def insert(self, val: str) -> None:
|
|
||||||
# Ensure we have enough space to insert the whole string
|
|
||||||
if len(val) > self.gap_length:
|
|
||||||
self.grow(max(self.capacity * 2, self.string_length + len(val)))
|
|
||||||
# Fill the gap with the given string
|
|
||||||
self._buf[self._gap_start : self._gap_start + len(val)] = val
|
|
||||||
self._gap_start += len(val)
|
|
||||||
```
|
|
||||||
|
|
||||||
### Deletion
|
|
||||||
|
|
||||||
Removing text from the buffer simply expands the gap in the corresponding
|
|
||||||
direction, shortening the string's prefix/suffix. This makes it very cheap.
|
|
||||||
|
|
||||||
The methods are named after the `backspace` and `delete` keys on the keyboard.
|
|
||||||
|
|
||||||
```python
|
|
||||||
def backspace(self, dist: int = 1) -> None:
|
|
||||||
assert dist <= self.prefix_length
|
|
||||||
# Extend gap to the left
|
|
||||||
self._gap_start -= dist
|
|
||||||
|
|
||||||
def delete(self, dist: int = 1) -> None:
|
|
||||||
assert dist <= self.suffix_length
|
|
||||||
# Extend gap to the right
|
|
||||||
self._gap_end += dist
|
|
||||||
```
|
|
||||||
|
|
||||||
### Moving the cursor
|
|
||||||
|
|
||||||
Moving the cursor along the buffer will shift letters from one side of the gap
|
|
||||||
to the other, moving them across from prefix to suffix and back.
|
|
||||||
|
|
||||||
I find Python's list slicing not quite as elegant to read as a `memmove`, though
|
|
||||||
it does make for a very small and efficient implementation.
|
|
||||||
|
|
||||||
```python
|
|
||||||
def left(self, dist: int = 1) -> None:
|
|
||||||
assert dist <= self.prefix_length
|
|
||||||
# Shift the needed number of characters from end of prefix to start of suffix
|
|
||||||
self._buf[self._gap_end - dist : self._gap_end] = self._buf[
|
|
||||||
self._gap_start - dist : self._gap_start
|
|
||||||
]
|
|
||||||
# Adjust indices accordingly
|
|
||||||
self._gap_start -= dist
|
|
||||||
self._gap_end -= dist
|
|
||||||
|
|
||||||
def right(self, dist: int = 1) -> None:
|
|
||||||
assert dist <= self.suffix_length
|
|
||||||
# Shift the needed number of characters from start of suffix to end of prefix
|
|
||||||
self._buf[self._gap_start : self._gap_start + dist] = self._buf[
|
|
||||||
self._gap_end : self._gap_end + dist
|
|
||||||
]
|
|
||||||
# Adjust indices accordingly
|
|
||||||
self._gap_start += dist
|
|
||||||
self._gap_end += dist
|
|
||||||
```
|
|
|
@ -1,97 +0,0 @@
|
||||||
---
|
|
||||||
title: "Bloom Filter"
|
|
||||||
date: 2024-07-14T17:46:40+01:00
|
|
||||||
draft: false # I don't care for draft mode, git has branches for that
|
|
||||||
description: "Probably cool"
|
|
||||||
tags:
|
|
||||||
- algorithms
|
|
||||||
- data structures
|
|
||||||
- python
|
|
||||||
categories:
|
|
||||||
- programming
|
|
||||||
series:
|
|
||||||
- Cool algorithms
|
|
||||||
favorite: false
|
|
||||||
disable_feed: false
|
|
||||||
---
|
|
||||||
|
|
||||||
The [_Bloom Filter_][wiki] is a probabilistic data structure for set membership.
|
|
||||||
|
|
||||||
The filter can be used as an inexpensive first step when querying the actual
|
|
||||||
data is quite costly (e.g: as a first check for expensive cache lookups or large
|
|
||||||
data seeks).
|
|
||||||
|
|
||||||
[wiki]: https://en.wikipedia.org/wiki/Bloom_filter
|
|
||||||
|
|
||||||
<!--more-->
|
|
||||||
|
|
||||||
## What does it do?
|
|
||||||
|
|
||||||
A _Bloom Filter_ can be understood as a hash-set which can either tell you:
|
|
||||||
|
|
||||||
* An element is _not_ part of the set.
|
|
||||||
* An element _may be_ part of the set.
|
|
||||||
|
|
||||||
More specifically, one can tweak the parameters of the filter to make it so that
|
|
||||||
the _false positive_ rate of membership is quite low.
|
|
||||||
|
|
||||||
I won't be going into those calculations here, but they are quite trivial to
|
|
||||||
compute, or one can just look up appropriate values for their use case.
|
|
||||||
|
|
||||||
## Implementation
|
|
||||||
|
|
||||||
I'll be using Python, which has the nifty ability of representing bitsets
|
|
||||||
through its built-in big integers quite easily.
|
|
||||||
|
|
||||||
We'll be assuming a `BIT_COUNT` of 64 here, but the implementation can easily be
|
|
||||||
tweaked to use a different number, or even change it at construction time.
|
|
||||||
|
|
||||||
### Representation
|
|
||||||
|
|
||||||
A `BloomFilter` is just a set of bits and a list of hash functions.
|
|
||||||
|
|
||||||
```python
|
|
||||||
BIT_COUNT = 64
|
|
||||||
|
|
||||||
class BloomFilter[T]:
|
|
||||||
_bits: int
|
|
||||||
_hash_functions: list[Callable[[T], int]]
|
|
||||||
|
|
||||||
def __init__(self, hash_functions: list[Callable[[T], int]]) -> None:
|
|
||||||
# Filter is initially empty
|
|
||||||
self._bits = 0
|
|
||||||
self._hash_functions = hash_functions
|
|
||||||
```
|
|
||||||
|
|
||||||
### Inserting a key
|
|
||||||
|
|
||||||
To add an element to the filter, we take the output from each hash function and
|
|
||||||
use that to set a bit in the filter. This combination of bit will identify the
|
|
||||||
element, which we can use for lookup later.
|
|
||||||
|
|
||||||
```python
|
|
||||||
def insert(self, val: T) -> None:
|
|
||||||
# Iterate over each hash
|
|
||||||
for f in self._hash_functions:
|
|
||||||
n = f(val) % BIT_COUNT
|
|
||||||
# Set the corresponding bit
|
|
||||||
self._bit |= 1 << n
|
|
||||||
```
|
|
||||||
|
|
||||||
### Querying a key
|
|
||||||
|
|
||||||
Because the _Bloom Filter_ does not actually store its elements, but some
|
|
||||||
derived data from hashing them, it can only definitely say if an element _does
|
|
||||||
not_ belong to it. Otherwise, it _may_ be part of the set, and should be checked
|
|
||||||
against the actual underlying store.
|
|
||||||
|
|
||||||
```python
|
|
||||||
def may_contain(self, val: T) -> bool:
|
|
||||||
for f in self._hash_functions:
|
|
||||||
n = f(val) % BIT_COUNT
|
|
||||||
# If one of the bits is unset, the value is definitely not present
|
|
||||||
if not (self._bit & (1 << n)):
|
|
||||||
return False
|
|
||||||
# All bits were matched, `val` is likely to be part of the set
|
|
||||||
return True
|
|
||||||
```
|
|
|
@ -1,159 +0,0 @@
|
||||||
---
|
|
||||||
title: "Treap"
|
|
||||||
date: 2024-07-20T14:12:27+01:00
|
|
||||||
draft: false # I don't care for draft mode, git has branches for that
|
|
||||||
description: "A simpler BST"
|
|
||||||
tags:
|
|
||||||
- algorithms
|
|
||||||
- data structures
|
|
||||||
- python
|
|
||||||
categories:
|
|
||||||
- programming
|
|
||||||
series:
|
|
||||||
- Cool algorithms
|
|
||||||
favorite: false
|
|
||||||
disable_feed: false
|
|
||||||
graphviz: true
|
|
||||||
---
|
|
||||||
|
|
||||||
The [_Treap_][wiki] is a mix between a _Binary Search Tree_ and a _Heap_.
|
|
||||||
|
|
||||||
Like a _Binary Search Tree_, it keeps an ordered set of keys in the shape of a
|
|
||||||
tree, allowing for binary search traversal.
|
|
||||||
|
|
||||||
Like a _Heap_, it associates each node with a priority, making sure that a
|
|
||||||
parent's priority is always higher than any of its children.
|
|
||||||
|
|
||||||
[wiki]: https://en.wikipedia.org/wiki/Treap
|
|
||||||
|
|
||||||
<!--more-->
|
|
||||||
|
|
||||||
## What does it do?
|
|
||||||
|
|
||||||
By randomizing the priority value of each key at insertion time, we ensure a
|
|
||||||
high likelihood that the tree stays _roughly_ balanced, avoiding degenerating to
|
|
||||||
unbalanced O(N) height.
|
|
||||||
|
|
||||||
Here's a sample tree created by inserting integers from 0 to 250 into the tree:
|
|
||||||
|
|
||||||
{{< graphviz file="treap.gv" />}}
|
|
||||||
|
|
||||||
## Implementation
|
|
||||||
|
|
||||||
I'll be keeping the theme for this [series] by using Python to implement the
|
|
||||||
_Treap_. This leads to somewhat annoying code to handle the rotation process,
|
|
||||||
which is easier to do in C using pointers.
|
|
||||||
|
|
||||||
[series]: {{< ref "/series/cool-algorithms/" >}}
|
|
||||||
|
|
||||||
### Representation
|
|
||||||
|
|
||||||
Creating a new `Treap` is easy: the tree starts off empty, waiting for new nodes
|
|
||||||
to insert.
|
|
||||||
|
|
||||||
Each `Node` must keep track of the `key`, the mapped `value`, and the node's
|
|
||||||
`priority` (which is assigned randomly). Finally it must also allow for storing
|
|
||||||
two children (`left` and `right`).
|
|
||||||
|
|
||||||
```python
|
|
||||||
class Node[K, V]:
|
|
||||||
key: K
|
|
||||||
value: V
|
|
||||||
priority: float
|
|
||||||
left: Node[K, V] | None
|
|
||||||
righg: Node[K, V] | None
|
|
||||||
|
|
||||||
def __init__(self, key: K, value: V):
|
|
||||||
# Store key and value, like a normal BST node
|
|
||||||
self.key = key
|
|
||||||
self.value = value
|
|
||||||
# Priority is derived randomly
|
|
||||||
self.priority = random()
|
|
||||||
self.left = None
|
|
||||||
self.right = None
|
|
||||||
|
|
||||||
class Treap[K, V]:
|
|
||||||
_root: Node[K, V] | None
|
|
||||||
|
|
||||||
def __init__(self):
|
|
||||||
# The tree starts out empty
|
|
||||||
self._root = None
|
|
||||||
```
|
|
||||||
|
|
||||||
### Search
|
|
||||||
|
|
||||||
Searching the tree is the same as in any other _Binary Search Tree_.
|
|
||||||
|
|
||||||
```python
|
|
||||||
def get(self, key: K) -> T | None:
|
|
||||||
node = self._root
|
|
||||||
# The usual BST traversal
|
|
||||||
while node is not None:
|
|
||||||
if node.key == key:
|
|
||||||
return node.value
|
|
||||||
elif node.key < key:
|
|
||||||
node = node.right
|
|
||||||
else:
|
|
||||||
node = node.left
|
|
||||||
return None
|
|
||||||
```
|
|
||||||
|
|
||||||
### Insertion
|
|
||||||
|
|
||||||
To insert a new `key` into the tree, we identify which leaf position it should
|
|
||||||
be inserted at. We then generate the node's priority, insert it at this
|
|
||||||
position, and rotate the node upwards until the heap property is respected.
|
|
||||||
|
|
||||||
```python
|
|
||||||
type ChildField = Literal["left, right"]
|
|
||||||
|
|
||||||
def insert(self, key: K, value: V) -> bool:
|
|
||||||
# Empty treap base-case
|
|
||||||
if self._root is None:
|
|
||||||
self._root = Node(key, value)
|
|
||||||
# Signal that we're not overwriting the value
|
|
||||||
return False
|
|
||||||
# Keep track of the parent chain for rotation after insertion
|
|
||||||
parents = []
|
|
||||||
node = self._root
|
|
||||||
while node is not None:
|
|
||||||
# Insert a pre-existing key
|
|
||||||
if node.key == key:
|
|
||||||
node.value = value
|
|
||||||
return True
|
|
||||||
# Go down the tree, keep track of the path through the tree
|
|
||||||
field = "left" if key < node.key else "right"
|
|
||||||
parents.append((node, field))
|
|
||||||
node = getattr(node, field)
|
|
||||||
# Key wasn't found, we're inserting a new node
|
|
||||||
child = Node(key, value)
|
|
||||||
parent, field = parents[-1]
|
|
||||||
setattr(parent, field, child)
|
|
||||||
# Rotate the new node up until we respect the decreasing priority property
|
|
||||||
self._rotate_up(child, parents)
|
|
||||||
# Key wasn't found, signal that we inserted a new node
|
|
||||||
return False
|
|
||||||
|
|
||||||
def _rotate_up(
|
|
||||||
self,
|
|
||||||
node: Node[K, V],
|
|
||||||
parents: list[tuple[Node[K, V], ChildField]],
|
|
||||||
) -> None:
|
|
||||||
while parents:
|
|
||||||
parent, field = parents.pop()
|
|
||||||
# If the parent has higher priority, we're done rotating
|
|
||||||
if parent.priority >= node.priority:
|
|
||||||
break
|
|
||||||
# Check for grand-parent/root of tree edge-case
|
|
||||||
if parents:
|
|
||||||
# Update grand-parent to point to the new rotated node
|
|
||||||
grand_parent, field = parents[-1]
|
|
||||||
setattr(grand_parent, field, node)
|
|
||||||
else:
|
|
||||||
# Point the root to the new rotated node
|
|
||||||
self._root = node
|
|
||||||
other_field = "left" if field == "right" else "right"
|
|
||||||
# Rotate the node up
|
|
||||||
setattr(parent, field, getattr(node, other_field))
|
|
||||||
setattr(node, other_field, parent)
|
|
||||||
```
|
|
File diff suppressed because it is too large
Load diff
|
@ -1,146 +0,0 @@
|
||||||
---
|
|
||||||
title: "Treap, revisited"
|
|
||||||
date: 2024-07-27T14:12:27+01:00
|
|
||||||
draft: false # I don't care for draft mode, git has branches for that
|
|
||||||
description: "An even simpler BST"
|
|
||||||
tags:
|
|
||||||
- algorithms
|
|
||||||
- data structures
|
|
||||||
- python
|
|
||||||
categories:
|
|
||||||
- programming
|
|
||||||
series:
|
|
||||||
- Cool algorithms
|
|
||||||
favorite: false
|
|
||||||
disable_feed: false
|
|
||||||
---
|
|
||||||
|
|
||||||
My [last post]({{< relref "../2024-07-20-treap/index.md" >}}) about the _Treap_
|
|
||||||
showed an implementation using tree rotations, as is commonly done with [AVL
|
|
||||||
Trees][avl] and [Red Black Trees][rb].
|
|
||||||
|
|
||||||
But the _Treap_ lends itself well to a simple and elegant implementation with no
|
|
||||||
tree rotations. This makes it especially easy to implement the removal of a key,
|
|
||||||
rather than the fiddly process of deletion using tree rotations.
|
|
||||||
|
|
||||||
[avl]: https://en.wikipedia.org/wiki/AVL_tree
|
|
||||||
[rb]: https://en.wikipedia.org/wiki/Red%E2%80%93black_tree
|
|
||||||
|
|
||||||
<!--more-->
|
|
||||||
|
|
||||||
## Implementation
|
|
||||||
|
|
||||||
All operations on the tree will be implemented in terms of two fundamental
|
|
||||||
operations: `split` and `merge`.
|
|
||||||
|
|
||||||
We'll be reusing the same structures as in the last post, so let's skip straight
|
|
||||||
to implementing those fundaments, and building on them for `insert` and
|
|
||||||
`delete`.
|
|
||||||
|
|
||||||
### Split
|
|
||||||
|
|
||||||
Splitting a tree means taking a key, and getting the following output:
|
|
||||||
|
|
||||||
* a `left` node, root of the tree of all keys lower than the input.
|
|
||||||
* an extracted `node` which corresponds to the input `key`.
|
|
||||||
* a `right` node, root of the tree of all keys higher than the input.
|
|
||||||
|
|
||||||
```python
|
|
||||||
type OptionalNode[K, V] = Node[K, V] | None
|
|
||||||
|
|
||||||
class SplitResult(NamedTuple):
|
|
||||||
left: OptionalNode
|
|
||||||
node: OptionalNode
|
|
||||||
right: OptionalNode
|
|
||||||
|
|
||||||
def split(root: OptionalNode[K, V], key: K) -> SplitResult:
|
|
||||||
# Base case, empty tree
|
|
||||||
if root is None:
|
|
||||||
return SplitResult(None, None, None)
|
|
||||||
# If we found the key, simply extract left and right
|
|
||||||
if root.key == key:
|
|
||||||
left, right = root.left, root.right
|
|
||||||
root.left, root.right = None, None
|
|
||||||
return SplitResult(left, root, right)
|
|
||||||
# Otherwise, recurse on the corresponding side of the tree
|
|
||||||
if root.key < key:
|
|
||||||
left, node, right = split(root.right, key)
|
|
||||||
root.right = left
|
|
||||||
return SplitResult(root, node, right)
|
|
||||||
if key < root.key:
|
|
||||||
left, node, right = split(root.left, key)
|
|
||||||
root.left = right
|
|
||||||
return SplitResult(left, node, root)
|
|
||||||
raise RuntimeError("Unreachable")
|
|
||||||
```
|
|
||||||
|
|
||||||
### Merge
|
|
||||||
|
|
||||||
Merging a `left` and `right` tree means (cheaply) building a new tree containing
|
|
||||||
both of them. A pre-condition for merging is that the `left` tree is composed
|
|
||||||
entirely of nodes that are lower than any key in `right` (i.e: as in `left` and
|
|
||||||
`right` after a `split`).
|
|
||||||
|
|
||||||
```python
|
|
||||||
def merge(
|
|
||||||
left: OptionalNode[K, V],
|
|
||||||
right: OptionalNode[K, V],
|
|
||||||
) -> OptionalNode[K, V]:
|
|
||||||
# Base cases, left or right being empty
|
|
||||||
if left is None:
|
|
||||||
return right
|
|
||||||
if right is None:
|
|
||||||
return left
|
|
||||||
# Left has higher priority, it must become the root node
|
|
||||||
if left.priority >= right.priority:
|
|
||||||
# We recursively reconstruct its right sub-tree
|
|
||||||
left.right = merge(left.right, right)
|
|
||||||
return left
|
|
||||||
# Right has higher priority, it must become the root node
|
|
||||||
if left.priority < right.priority:
|
|
||||||
# We recursively reconstruct its left sub-tree
|
|
||||||
right.left = merge(left, right.left)
|
|
||||||
return right
|
|
||||||
raise RuntimeError("Unreachable")
|
|
||||||
```
|
|
||||||
|
|
||||||
### Insertion
|
|
||||||
|
|
||||||
Inserting a node into the tree is done in two steps:
|
|
||||||
|
|
||||||
1. `split` the tree to isolate the middle insertion point
|
|
||||||
2. `merge` it back up to form a full tree with the inserted key
|
|
||||||
|
|
||||||
```python
|
|
||||||
def insert(self, key: K, value: V) -> bool:
|
|
||||||
# `left` and `right` come before/after the key
|
|
||||||
left, node, right = split(self._root, key)
|
|
||||||
was_updated: bool
|
|
||||||
# Create the node, or update its value, if the key was already in the tree
|
|
||||||
if node is None:
|
|
||||||
node = Node(key, value)
|
|
||||||
was_updated = False
|
|
||||||
else:
|
|
||||||
node.value = value
|
|
||||||
was_updated = True
|
|
||||||
# Rebuild the tree with a couple of merge operations
|
|
||||||
self._root = merge(left, merge(node, right))
|
|
||||||
# Signal whether the key was already in the key
|
|
||||||
return was_updated
|
|
||||||
```
|
|
||||||
|
|
||||||
### Removal
|
|
||||||
|
|
||||||
Removing a key from the tree is similar to inserting a new key, and forgetting
|
|
||||||
to insert it back: simply `split` the tree and `merge` it back without the
|
|
||||||
extracted middle node.
|
|
||||||
|
|
||||||
```python
|
|
||||||
def remove(self, key: K) -> bool:
|
|
||||||
# `node` contains the key, or `None` if the key wasn't in the tree
|
|
||||||
left, node, right = split(self._root, key)
|
|
||||||
# Put the tree back together, without the extract node
|
|
||||||
self._root = merge(left, right)
|
|
||||||
# Signal whether `key` was mapped in the tree
|
|
||||||
return node is not None
|
|
||||||
```
|
|
|
@ -1,145 +0,0 @@
|
||||||
---
|
|
||||||
title: "Reservoir Sampling"
|
|
||||||
date: 2024-08-02T18:30:56+01:00
|
|
||||||
draft: false # I don't care for draft mode, git has branches for that
|
|
||||||
description: "Elegantly sampling a stream"
|
|
||||||
tags:
|
|
||||||
- algorithms
|
|
||||||
- python
|
|
||||||
categories:
|
|
||||||
- programming
|
|
||||||
series:
|
|
||||||
- Cool algorithms
|
|
||||||
favorite: false
|
|
||||||
disable_feed: false
|
|
||||||
mathjax: true
|
|
||||||
---
|
|
||||||
|
|
||||||
[_Reservoir Sampling_][reservoir] is an [online][online], probabilistic
|
|
||||||
algorithm to uniformly sample $k$ random elements out of a stream of values.
|
|
||||||
|
|
||||||
It's a particularly elegant and small algorithm, only requiring $\Theta(k)$
|
|
||||||
amount of space and a single pass through the stream.
|
|
||||||
|
|
||||||
[reservoir]: https://en.wikipedia.org/wiki/Reservoir_sampling
|
|
||||||
[online]: https://en.wikipedia.org/wiki/Online_algorithm
|
|
||||||
|
|
||||||
<!--more-->
|
|
||||||
|
|
||||||
## Sampling one element
|
|
||||||
|
|
||||||
As an introduction, we'll first focus on fairly sampling one element from the
|
|
||||||
stream.
|
|
||||||
|
|
||||||
```python
|
|
||||||
def sample_one[T](stream: Iterable[T]) -> T:
|
|
||||||
stream_iter = iter(stream)
|
|
||||||
# Sample the first element
|
|
||||||
res = next(stream_iter)
|
|
||||||
for i, val in enumerate(stream_iter, start=1):
|
|
||||||
j = random.randint(0, i)
|
|
||||||
# Replace the sampled element with probability 1/(i + 1)
|
|
||||||
if j == 0:
|
|
||||||
res = val
|
|
||||||
# Return the randomly sampled element
|
|
||||||
return res
|
|
||||||
```
|
|
||||||
|
|
||||||
### Proof
|
|
||||||
|
|
||||||
Let's now prove that this algorithm leads to a fair sampling of the stream.
|
|
||||||
|
|
||||||
We'll be doing proof by induction.
|
|
||||||
|
|
||||||
#### Hypothesis $H_N$
|
|
||||||
|
|
||||||
After iterating through the first $N$ items in the stream,
|
|
||||||
each of them has had an equal $\frac{1}{N}$ probability of being selected as
|
|
||||||
`res`.
|
|
||||||
|
|
||||||
#### Base Case $H_1$
|
|
||||||
|
|
||||||
We can trivially observe that the first element is always assigned to `res`,
|
|
||||||
$\frac{1}{1} = 1$, the hypothesis has been verified.
|
|
||||||
|
|
||||||
#### Inductive Case
|
|
||||||
|
|
||||||
For a given $N$, let us assume that $H_N$ holds. Let us now look at the events
|
|
||||||
of loop iteration where `i = N` (i.e: observation of the $N + 1$-th item in the
|
|
||||||
stream).
|
|
||||||
|
|
||||||
`j = random.randint(0, i)` uniformly selects a value in the range $[0, i]$,
|
|
||||||
a.k.a $[0, N]$. We then have two cases:
|
|
||||||
|
|
||||||
* `j == 0`, with probability $\frac{1}{N + 1}$: we select `val` as the new
|
|
||||||
reservoir element `res`.
|
|
||||||
|
|
||||||
* `j != 0`, with probability $\frac{N}{N + 1}$: we keep the previous value of
|
|
||||||
`res`. By $H_N$, any of the first $N$ elements had a $\frac{1}{N}$ probability
|
|
||||||
of being `res` before at the start of the loop, each element now has a
|
|
||||||
probability $\frac{1}{N} \cdot \frac{N}{N + 1} = \frac{1}{N + 1}$ of being the
|
|
||||||
element.
|
|
||||||
|
|
||||||
And thus, we have proven $H_{N + 1}$ at the end of the loop.
|
|
||||||
|
|
||||||
## Sampling $k$ element
|
|
||||||
|
|
||||||
The code for sampling $k$ elements is very similar to the one-element case.
|
|
||||||
|
|
||||||
```python
|
|
||||||
def sample[T](stream: Iterable[T], k: int = 1) -> list[T]:
|
|
||||||
stream_iter = iter(stream)
|
|
||||||
# Retain the first 'k' elements in the reservoir
|
|
||||||
res = list(itertools.islice(stream_iter, k))
|
|
||||||
for i, val in enumerate(stream_iter, start=k):
|
|
||||||
j = random.randint(0, i)
|
|
||||||
# Replace one element at random with probability k/(i + 1)
|
|
||||||
if j < k:
|
|
||||||
res[j] = val
|
|
||||||
# Return 'k' randomly sampled elements
|
|
||||||
return res
|
|
||||||
```
|
|
||||||
|
|
||||||
### Proof
|
|
||||||
|
|
||||||
Let us once again do a proof by induction, assuming the stream contains at least
|
|
||||||
$k$ items.
|
|
||||||
|
|
||||||
#### Hypothesis $H_N$
|
|
||||||
|
|
||||||
After iterating through the first $N$ items in the stream, each of them has had
|
|
||||||
an equal $\frac{k}{N}$ probability of being sampled from the stream.
|
|
||||||
|
|
||||||
#### Base Case $H_k$
|
|
||||||
|
|
||||||
We can trivially observe that the first $k$ element are sampled at the start of
|
|
||||||
the algorithm, $\frac{k}{k} = 1$, the hypothesis has been verified.
|
|
||||||
|
|
||||||
#### Inductive Case
|
|
||||||
|
|
||||||
For a given $N$, let us assume that $H_N$ holds. Let us now look at the events
|
|
||||||
of the loop iteration where `i = N`, in order to prove $H_{N + 1}$.
|
|
||||||
|
|
||||||
`j = random.randint(0, i)` uniformly selects a value in the range $[0, i]$,
|
|
||||||
a.k.a $[0, N]$. We then have three cases:
|
|
||||||
|
|
||||||
* `j >= k`, with probability $1 - \frac{k}{N + 1}$: we do not modify the
|
|
||||||
sampled reservoir at all.
|
|
||||||
|
|
||||||
* `j < k`, with probability $\frac{k}{N + 1}$: we sample the new element to
|
|
||||||
replace the `j`-th element of the reservoir. Therefore for any element
|
|
||||||
$e \in [0, k[$ we can either have:
|
|
||||||
* $j = e$: the element _is_ replaced, probability $\frac{1}{k}$.
|
|
||||||
* $j \neq e$: the element is _not_ replaced, probability $\frac{k - 1}{k}$.
|
|
||||||
|
|
||||||
We can now compute the probability that a previously sampled element is kept in
|
|
||||||
the reservoir:
|
|
||||||
$1 - \frac{k}{N + 1} + \frac{k}{N + 1} \cdot \frac{k - 1}{k} = \frac{N}{N + 1}$.
|
|
||||||
|
|
||||||
By $H_N$, any of the first $N$ elements had a $\frac{k}{N}$ probability
|
|
||||||
of being sampled before at the start of the loop, each element now has a
|
|
||||||
probability $\frac{k}{N} \cdot \frac{N}{N + 1} = \frac{k}{N + 1}$ of being the
|
|
||||||
element.
|
|
||||||
|
|
||||||
We have now proven that all elements have a probability $\frac{k}{N + 1}$ of
|
|
||||||
being sampled at the end of the loop, therefore $H_{N + 1}$ has been verified.
|
|
|
@ -1,472 +0,0 @@
|
||||||
---
|
|
||||||
title: "k-d Tree"
|
|
||||||
date: 2024-08-10T11:50:33+01:00
|
|
||||||
draft: false # I don't care for draft mode, git has branches for that
|
|
||||||
description: "Points in spaaaaace!"
|
|
||||||
tags:
|
|
||||||
- algorithms
|
|
||||||
- data structures
|
|
||||||
- python
|
|
||||||
categories:
|
|
||||||
- programming
|
|
||||||
series:
|
|
||||||
- Cool algorithms
|
|
||||||
favorite: false
|
|
||||||
disable_feed: false
|
|
||||||
---
|
|
||||||
|
|
||||||
The [_k-d Tree_][wiki] is a useful way to map points in space and make them
|
|
||||||
efficient to query.
|
|
||||||
|
|
||||||
I ran into them during my studies in graphics, as they are one of the
|
|
||||||
possible acceleration structures for [ray-casting] operations.
|
|
||||||
|
|
||||||
[wiki]: https://en.wikipedia.org/wiki/K-d_tree
|
|
||||||
[ray-casting]: https://en.wikipedia.org/wiki/Ray_casting
|
|
||||||
|
|
||||||
<!--more-->
|
|
||||||
|
|
||||||
## Implementation
|
|
||||||
|
|
||||||
As usual, this will be in Python, though its lack of proper discriminated enums
|
|
||||||
makes it more verbose than would otherwise be necessary.
|
|
||||||
|
|
||||||
### Pre-requisites
|
|
||||||
|
|
||||||
Let's first define what kind of space our _k-d Tree_ is dealing with. In this
|
|
||||||
instance $k = 3$ just like in the normal world.
|
|
||||||
|
|
||||||
```python
|
|
||||||
class Point(NamedTuple):
|
|
||||||
x: float
|
|
||||||
y: float
|
|
||||||
z: float
|
|
||||||
|
|
||||||
class Axis(IntEnum):
|
|
||||||
X = 0
|
|
||||||
Y = 1
|
|
||||||
Z = 2
|
|
||||||
|
|
||||||
def next(self) -> Axis:
|
|
||||||
# Each level of the tree is split along a different axis
|
|
||||||
return Axis((self + 1) % 3)
|
|
||||||
```
|
|
||||||
|
|
||||||
### Representation
|
|
||||||
|
|
||||||
The tree is represented by `KdTree`, each of its leaf nodes is a `KdLeafNode`
|
|
||||||
and its inner nodes are `KdSplitNode`s.
|
|
||||||
|
|
||||||
For each point in space, the tree can also keep track of an associated value,
|
|
||||||
similar to a dictionary or other mapping data structure. Hence we will make our
|
|
||||||
`KdTree` generic to this mapped type `T`.
|
|
||||||
|
|
||||||
#### Leaf node
|
|
||||||
|
|
||||||
A leaf node contains a number of points that were added to the tree. For each
|
|
||||||
point, we also track their mapped value, hence the `dict[Point, T]`.
|
|
||||||
|
|
||||||
```python
|
|
||||||
class KdLeafNode[T]:
|
|
||||||
points: dict[Point, T]
|
|
||||||
|
|
||||||
def __init__(self):
|
|
||||||
self.points = {}
|
|
||||||
```
|
|
||||||
|
|
||||||
#### Split node
|
|
||||||
|
|
||||||
An inner node must partition the space into two sub-spaces along a given axis
|
|
||||||
and mid-point (thus defining a plane). All points that are "to the left" of the
|
|
||||||
plane will be kept in one child, while all the points "to the right" will be in
|
|
||||||
the other. Similar to a [_Binary Search Tree_][bst]'s inner nodes.
|
|
||||||
|
|
||||||
[bst]: https://en.wikipedia.org/wiki/Binary_search_tree
|
|
||||||
|
|
||||||
```python
|
|
||||||
class KdSplitNode[T]:
|
|
||||||
axis: Axis
|
|
||||||
mid: float
|
|
||||||
children: tuple[KdTreeNode[T], KdTreeNode[T]]
|
|
||||||
|
|
||||||
# Convenience function to index into the child which contains `point`
|
|
||||||
def _index(self, point: Point) -> int:
|
|
||||||
return 0 if point[self.axis] <= self.mid else 1
|
|
||||||
```
|
|
||||||
|
|
||||||
#### Tree
|
|
||||||
|
|
||||||
The tree itself is merely a wrapper around its inner nodes.
|
|
||||||
|
|
||||||
Once annoying issue about writing this in Python is the lack of proper
|
|
||||||
discriminated enum types. So we need to create a wrapper type for the nodes
|
|
||||||
(`KdNode`) to allow for splitting when updating the tree.
|
|
||||||
|
|
||||||
```python
|
|
||||||
class KdNode[T]:
|
|
||||||
# Wrapper around leaf/inner nodes, the poor man's discriminated enum
|
|
||||||
inner: KdLeafNode[T] | KdSplitNode[T]
|
|
||||||
|
|
||||||
def __init__(self):
|
|
||||||
self.inner = KdLeafNode()
|
|
||||||
|
|
||||||
# Convenience constructor used when splitting a node
|
|
||||||
@classmethod
|
|
||||||
def from_items(cls, items: Iterable[tuple[Point, T]]) -> KdNode[T]:
|
|
||||||
res = cls()
|
|
||||||
res.inner.points.update(items)
|
|
||||||
return res
|
|
||||||
|
|
||||||
class KdTree[T]:
|
|
||||||
_root: KdNode[T]
|
|
||||||
|
|
||||||
def __init__(self):
|
|
||||||
# Tree starts out empty
|
|
||||||
self._root = KdNode()
|
|
||||||
```
|
|
||||||
|
|
||||||
### Inserting a point
|
|
||||||
|
|
||||||
To add a point to the tree, we simply recurse from node to node, similar to a
|
|
||||||
_BST_'s insertion algorithm. Once we've found the correct leaf node to insert
|
|
||||||
our point into, we simply do so.
|
|
||||||
|
|
||||||
If that leaf node goes over the maximum number of points it can store, we must
|
|
||||||
then split it along an axis, cycling between `X`, `Y`, and `Z` at each level of
|
|
||||||
the tree (i.e: splitting along the `X` axis on the first level, then `Y` on the
|
|
||||||
second, then `Z` after that, and then `X`, etc...).
|
|
||||||
|
|
||||||
```python
|
|
||||||
# How many points should be stored in a leaf node before being split
|
|
||||||
MAX_CAPACITY = 32
|
|
||||||
|
|
||||||
def median(values: Iterable[float]) -> float:
|
|
||||||
sorted_values = sorted(values)
|
|
||||||
mid_point = len(sorted_values) // 2
|
|
||||||
if len(sorted_values) % 2 == 1:
|
|
||||||
return sorted_values[mid_point]
|
|
||||||
a, b = sorted_values[mid_point], sorted_values[mid_point + 1]
|
|
||||||
return a + (b - a) / 2
|
|
||||||
|
|
||||||
def partition[T](
|
|
||||||
pred: Callable[[T], bool],
|
|
||||||
iterable: Iterable[T]
|
|
||||||
) -> tuple[list[T], list[T]]:
|
|
||||||
truths, falses = [], []
|
|
||||||
for v in iterable:
|
|
||||||
(truths if pred(v) else falses).append(v)
|
|
||||||
return truths, falses
|
|
||||||
|
|
||||||
def split_leaf[T](node: KdLeafNode[T], axis: Axis) -> KdSplitNode[T]:
|
|
||||||
# Find the median value for the given axis
|
|
||||||
mid = median(p[axis] for p in node.points)
|
|
||||||
# Split into left/right children according to the mid-point and axis
|
|
||||||
left, right = partition(lambda kv: kv[0][axis] <= mid, node.points.items())
|
|
||||||
return KdSplitNode(
|
|
||||||
split_axis,
|
|
||||||
mid,
|
|
||||||
(KdNode.from_items(left), KdNode.from_items(right)),
|
|
||||||
)
|
|
||||||
|
|
||||||
class KdTree[T]:
|
|
||||||
def insert(self, point: Point, val: T) -> bool:
|
|
||||||
# Forward to the root node, choose `X` as the first split axis
|
|
||||||
return self._root.insert(point, val, Axis.X)
|
|
||||||
|
|
||||||
class KdLeafNode[T]:
|
|
||||||
def insert(self, point: Point, val: T, split_axis: Axis) -> bool:
|
|
||||||
# Check whether we're overwriting a previous value
|
|
||||||
was_mapped = point in self.points
|
|
||||||
# Store the corresponding value
|
|
||||||
self.points[point] = val
|
|
||||||
# Return whether we've performed an overwrite
|
|
||||||
return was_mapped
|
|
||||||
|
|
||||||
class KdSplitNode[T]:
|
|
||||||
def insert(self, point: Point, val: T, split_axis: Axis) -> bool:
|
|
||||||
# Find the child which contains the point
|
|
||||||
child = self.children[self._index(point)]
|
|
||||||
# Recurse into it, choosing the next split axis
|
|
||||||
return child.insert(point, val, split_axis.next())
|
|
||||||
|
|
||||||
class KdNode[T]:
|
|
||||||
def insert(self, point: Point, val: T, split_axis: Axis) -> bool:
|
|
||||||
# Add the point to the wrapped node...
|
|
||||||
res = self.inner.insert(point, val, split_axis)
|
|
||||||
# ... And take care of splitting leaf nodes when necessary
|
|
||||||
if (
|
|
||||||
isinstance(self.inner, KdLeafNode)
|
|
||||||
and len(self.inner.points) > MAX_CAPACITY
|
|
||||||
):
|
|
||||||
self.inner = split_leaf(self.inner, split_axis)
|
|
||||||
return res
|
|
||||||
```
|
|
||||||
|
|
||||||
### Searching for a point
|
|
||||||
|
|
||||||
Looking for a given point in the tree look very similar to a _BST_'s search,
|
|
||||||
each leaf node dividing the space into two sub-spaces, only one of which
|
|
||||||
contains the point.
|
|
||||||
|
|
||||||
```python
|
|
||||||
class KdTree[T]:
|
|
||||||
def lookup(self, point: Point) -> T | None:
|
|
||||||
# Forward to the root node
|
|
||||||
return self._root.lookup(point)
|
|
||||||
|
|
||||||
class KdNode[T]:
|
|
||||||
def lookup(self, point: Point) -> T | None:
|
|
||||||
# Forward to the wrapped node
|
|
||||||
return self.inner.lookup(point)
|
|
||||||
|
|
||||||
class KdLeafNode[T]:
|
|
||||||
def lookup(self, point: Point) -> T | None:
|
|
||||||
# Simply check whether we've stored the point in this leaf
|
|
||||||
return self.points.get(point)
|
|
||||||
|
|
||||||
class KdSplitNode[T]:
|
|
||||||
def lookup(self, point: Point) -> T | None:
|
|
||||||
# Recurse into the child which contains the point
|
|
||||||
return self.children[self._index(point)].lookup(point)
|
|
||||||
```
|
|
||||||
|
|
||||||
### Closest points
|
|
||||||
|
|
||||||
Now to look at the most interesting operation one can do on a _k-d Tree_:
|
|
||||||
querying for the objects which are closest to a given point (i.e: the [Nearest
|
|
||||||
neighbour search][nns].
|
|
||||||
|
|
||||||
This is a more complicated algorithm, which will also need some modifications to
|
|
||||||
current _k-d Tree_ implementation in order to track just a bit more information
|
|
||||||
about the points it contains.
|
|
||||||
|
|
||||||
[nns]: https://en.wikipedia.org/wiki/Nearest_neighbor_search
|
|
||||||
|
|
||||||
#### A notion of distance
|
|
||||||
|
|
||||||
To search for the closest points to a given origin, we first need to define
|
|
||||||
which [distance](https://en.wikipedia.org/wiki/Distance) we are using in our
|
|
||||||
space.
|
|
||||||
|
|
||||||
For this example, we'll simply be using the usual definition of [(Euclidean)
|
|
||||||
distance][euclidean-distance].
|
|
||||||
|
|
||||||
[euclidean-distance]: https://en.wikipedia.org/wiki/Euclidean_distance
|
|
||||||
|
|
||||||
```python
|
|
||||||
def dist(point: Point, other: Point) -> float:
|
|
||||||
return sqrt(sum((a - b) ** 2 for a, b in zip(self, other)))
|
|
||||||
```
|
|
||||||
|
|
||||||
#### Tracking the tree's boundaries
|
|
||||||
|
|
||||||
To make the query efficient, we'll need to track the tree's boundaries: the
|
|
||||||
bounding box of all points contained therein. This will allow us to stop the
|
|
||||||
search early once we've found enough points and can be sure that the rest of the
|
|
||||||
tree is too far away to qualify.
|
|
||||||
|
|
||||||
For this, let's define the `AABB` (Axis-Aligned Bounding Box) class.
|
|
||||||
|
|
||||||
```python
|
|
||||||
class Point(NamedTuple):
|
|
||||||
# Convenience function to replace the coordinate along a given dimension
|
|
||||||
def replace(self, axis: Axis, new_coord: float) -> Point:
|
|
||||||
coords = list(self)
|
|
||||||
coords[axis] = new_coord
|
|
||||||
return Point(coords)
|
|
||||||
|
|
||||||
class AABB(NamedTuple):
|
|
||||||
# Lowest coordinates in the box
|
|
||||||
low: Point
|
|
||||||
# Highest coordinates in the box
|
|
||||||
high: Point
|
|
||||||
|
|
||||||
# An empty box
|
|
||||||
@classmethod
|
|
||||||
def empty(cls) -> AABB:
|
|
||||||
return cls(
|
|
||||||
Point(*(float("inf"),) * 3),
|
|
||||||
Point(*(float("-inf"),) * 3),
|
|
||||||
)
|
|
||||||
|
|
||||||
# Split the box into two along a given axis for a given mid-point
|
|
||||||
def split(axis: Axis, mid: float) -> tuple[AABB, AABB]:
|
|
||||||
assert self.low[axis] <= mid <= self.high[axis]
|
|
||||||
return (
|
|
||||||
AABB(self.low, self.high.replace(axis, mid)),
|
|
||||||
AABB(self.low.replace(axis, mid), self.high),
|
|
||||||
)
|
|
||||||
|
|
||||||
# Extend a box to contain a given point
|
|
||||||
def extend(self, point: Point) -> None:
|
|
||||||
low = NamedTuple(*(map(min, zip(self.low, point))))
|
|
||||||
high = NamedTuple(*(map(max, zip(self.high, point))))
|
|
||||||
return AABB(low, high)
|
|
||||||
|
|
||||||
# Return the shortest between a given point and the box
|
|
||||||
def dist_to_point(self, point: Point) -> float:
|
|
||||||
deltas = (
|
|
||||||
max(self.low[axis] - point[axis], 0, point[axis] - self.high[axis])
|
|
||||||
for axis in Axis
|
|
||||||
)
|
|
||||||
return dist(Point(0, 0, 0), Point(*deltas))
|
|
||||||
```
|
|
||||||
|
|
||||||
And do the necessary modifications to the `KdTree` to store the bounding box and
|
|
||||||
update it as we add new points.
|
|
||||||
|
|
||||||
```python
|
|
||||||
class KdTree[T]:
|
|
||||||
_root: KdNode[T]
|
|
||||||
# New field: to keep track of the tree's boundaries
|
|
||||||
_aabb: AABB
|
|
||||||
|
|
||||||
def __init__(self):
|
|
||||||
self._root = KdNode()
|
|
||||||
# Initialize the empty tree with an empty bounding box
|
|
||||||
self._aabb = AABB.empty()
|
|
||||||
|
|
||||||
def insert(self, point: Point, val: T) -> bool:
|
|
||||||
# Extend the AABB for our k-d Tree when adding a point to it
|
|
||||||
self._aabb = self._aabb.extend(point)
|
|
||||||
return self._root.insert(point, val, Axis.X)
|
|
||||||
```
|
|
||||||
|
|
||||||
#### `MaxHeap`
|
|
||||||
|
|
||||||
Python's builtin [`heapq`][heapq] module provides the necessary functions to
|
|
||||||
create and interact with a [_Priority Queue_][priority-queue], in the form of a
|
|
||||||
[_Binary Heap_][binary-heap].
|
|
||||||
|
|
||||||
Unfortunately, Python's library maintains a _min-heap_, which keeps the minimum
|
|
||||||
element at the root. For this algorithm, we're interested in having a
|
|
||||||
_max-heap_, with the maximum at the root.
|
|
||||||
|
|
||||||
Thankfully, one can just reverse the comparison function for each element to
|
|
||||||
convert between the two. Let's write a `MaxHeap` class making use of this
|
|
||||||
library, with a `Reverse` wrapper class to reverse the order of elements
|
|
||||||
contained within it (similar to [Rust's `Reverse`][reverse]).
|
|
||||||
|
|
||||||
[binary-heap]: https://en.wikipedia.org/wiki/Binary_heap
|
|
||||||
[heapq]: https://docs.python.org/3/library/heapq.html
|
|
||||||
[priority-queue]: https://en.wikipedia.org/wiki/Priority_queue
|
|
||||||
[reverse]: https://doc.rust-lang.org/std/cmp/struct.Reverse.html
|
|
||||||
|
|
||||||
```python
|
|
||||||
# Reverses the wrapped value's ordering
|
|
||||||
@functools.total_ordering
|
|
||||||
class Reverse[T]:
|
|
||||||
value: T
|
|
||||||
|
|
||||||
def __init__(self, value: T):
|
|
||||||
self.value = value
|
|
||||||
|
|
||||||
def __lt__(self, other: Reverse[T]) -> bool:
|
|
||||||
return self.value > other.value
|
|
||||||
|
|
||||||
def __eq__(self, other: Reverse[T]) -> bool:
|
|
||||||
return self.value == other.value
|
|
||||||
|
|
||||||
class MaxHeap[T]:
|
|
||||||
_heap: list[Reverse[T]]
|
|
||||||
|
|
||||||
def __init__(self):
|
|
||||||
self._heap = []
|
|
||||||
|
|
||||||
def __len__(self) -> int:
|
|
||||||
return len(self._heap)
|
|
||||||
|
|
||||||
def __iter__(self) -> Iterator[T]:
|
|
||||||
yield from (item.value for item in self._heap)
|
|
||||||
|
|
||||||
# Push a value on the heap
|
|
||||||
def push(self, value: T) -> None:
|
|
||||||
heapq.heappush(self._heap, Reverse(value))
|
|
||||||
|
|
||||||
# Peek at the current maximum value
|
|
||||||
def peek(self) -> T:
|
|
||||||
return self._heap[0].value
|
|
||||||
|
|
||||||
# Pop and return the highest value
|
|
||||||
def pop(self) -> T:
|
|
||||||
return heapq.heappop(self._heap).value
|
|
||||||
|
|
||||||
# Pushes a value onto the heap, pops and returns the highest value
|
|
||||||
def pushpop(self, value: T) -> None:
|
|
||||||
return heapq.heappushpop(self._heap, Reverse(value)).value
|
|
||||||
```
|
|
||||||
|
|
||||||
#### The actual Implementation
|
|
||||||
|
|
||||||
Now that we have written the necessary building blocks, let's tackle the
|
|
||||||
Implementation of `closest` for our _k-d Tree_.
|
|
||||||
|
|
||||||
```python
|
|
||||||
# Wrapper type for closest points, ordered by `distance`
|
|
||||||
@dataclasses.dataclass(order=True)
|
|
||||||
class ClosestPoint[T](NamedTuple):
|
|
||||||
point: Point = field(compare=False)
|
|
||||||
value: T = field(compare=False)
|
|
||||||
distance: float
|
|
||||||
|
|
||||||
class KdTree[T]:
|
|
||||||
def closest(self, point: Point, n: int = 1) -> list[ClosestPoint[T]]:
|
|
||||||
assert n > 0
|
|
||||||
# Create the output heap
|
|
||||||
res = MaxHeap()
|
|
||||||
# Recurse onto the root node
|
|
||||||
self._root.closest(point, res, n, self._aabb)
|
|
||||||
# Return the resulting list, from closest to farthest
|
|
||||||
return sorted(res)
|
|
||||||
|
|
||||||
class KdNode[T]:
|
|
||||||
def closest(
|
|
||||||
self,
|
|
||||||
point: Point,
|
|
||||||
out: MaxHeap[ClosestPoint[T]],
|
|
||||||
n: int,
|
|
||||||
bounds: AABB,
|
|
||||||
) -> None:
|
|
||||||
# Forward to the wrapped node
|
|
||||||
self.inner.closest(point, out, n, bounds)
|
|
||||||
|
|
||||||
class KdLeafNode[T]:
|
|
||||||
def closest(
|
|
||||||
self,
|
|
||||||
point: Point,
|
|
||||||
out: MaxHeap[ClosestPoint[T]],
|
|
||||||
n: int,
|
|
||||||
bounds: AABB,
|
|
||||||
) -> None:
|
|
||||||
# At the leaf, simply iterate over all points and add them to the heap
|
|
||||||
for p, val in self.points.items():
|
|
||||||
item = ClosestPoint(p, val, dist(p, point))
|
|
||||||
if len(out) < n:
|
|
||||||
# If the heap isn't full, just push
|
|
||||||
out.push(item)
|
|
||||||
elif out.peek().distance > item.distance:
|
|
||||||
# Otherwise, push and pop to keep the heap at `n` elements
|
|
||||||
out.pushpop(item)
|
|
||||||
|
|
||||||
class KdSplitNode[T]:
|
|
||||||
def closest(
|
|
||||||
self,
|
|
||||||
point: Point,
|
|
||||||
out: list[ClosestPoint[T]],
|
|
||||||
n: int,
|
|
||||||
bounds: AABB,
|
|
||||||
) -> None:
|
|
||||||
index = self._index(point)
|
|
||||||
children_bounds = bounds.split(self.axis, self.mid)
|
|
||||||
# Iterate over the child which contains the point, then its neighbour
|
|
||||||
for i in (index, 1 - index):
|
|
||||||
child, bounds = self.children[i], children_bounds[i]
|
|
||||||
# `min_dist` is 0 for the first child, and the minimum distance of
|
|
||||||
# all points contained in the second child
|
|
||||||
min_dist = bounds.dist_to_point(point)
|
|
||||||
# If the heap is at capacity and the child to inspect too far, stop
|
|
||||||
if len(out) == n and min_dist > out.peek().distance:
|
|
||||||
return
|
|
||||||
# Otherwise, recurse
|
|
||||||
child.closest(point, out, n, bounds)
|
|
||||||
```
|
|
|
@ -1,112 +0,0 @@
|
||||||
---
|
|
||||||
title: "Kd Tree Revisited"
|
|
||||||
date: 2024-08-17T14:20:22+01:00
|
|
||||||
draft: false # I don't care for draft mode, git has branches for that
|
|
||||||
description: "Simplifying the nearest neighbour search"
|
|
||||||
tags:
|
|
||||||
- algorithms
|
|
||||||
- data structures
|
|
||||||
- python
|
|
||||||
categories:
|
|
||||||
- programming
|
|
||||||
series:
|
|
||||||
- Cool algorithms
|
|
||||||
favorite: false
|
|
||||||
disable_feed: false
|
|
||||||
---
|
|
||||||
|
|
||||||
After giving it a bit of thought, I've found a way to simplify the nearest
|
|
||||||
neighbour search (i.e: the `closest` method) for the `KdTree` I implemented in
|
|
||||||
[my previous post]({{< relref "../2024-08-10-kd-tree/index.md" >}}).
|
|
||||||
|
|
||||||
<!--more-->
|
|
||||||
|
|
||||||
## The improvement
|
|
||||||
|
|
||||||
That post implemented the nearest neighbour search by keeping track of the
|
|
||||||
tree's boundaries (through `AABB`), and each of its sub-trees (through
|
|
||||||
`AABB.split`), and testing for the early exit condition by computing the
|
|
||||||
distance of the search's origin to each sub-tree's boundaries.
|
|
||||||
|
|
||||||
Instead of _explicitly_ keeping track of each sub-tree's boundaries, we can
|
|
||||||
implicitly compute it when recursing down the tree.
|
|
||||||
|
|
||||||
To check for the distance between the queried point and the splitting plane of
|
|
||||||
inner nodes: we simply need to project the origin onto that plane, thus giving
|
|
||||||
us a minimal bound on the distance of the points stored on the other side.
|
|
||||||
|
|
||||||
This can be easily computed from the `axis` and `mid` values which are stored in
|
|
||||||
the inner nodes: to project the node on the plane we simply replace its
|
|
||||||
coordinate for this axis by `mid`.
|
|
||||||
|
|
||||||
## Simplified search
|
|
||||||
|
|
||||||
With that out of the way, let's now see how `closest` can be implemented without
|
|
||||||
needing to track the tree's `AABB` at the root:
|
|
||||||
|
|
||||||
```python
|
|
||||||
# Wrapper type for closest points, ordered by `distance`
|
|
||||||
@dataclasses.dataclass(order=True)
|
|
||||||
class ClosestPoint[T](NamedTuple):
|
|
||||||
point: Point = field(compare=False)
|
|
||||||
value: T = field(compare=False)
|
|
||||||
distance: float
|
|
||||||
|
|
||||||
class KdTree[T]:
|
|
||||||
def closest(self, point: Point, n: int = 1) -> list[ClosestPoint[T]]:
|
|
||||||
assert n > 0
|
|
||||||
res = MaxHeap()
|
|
||||||
# Instead of passing an `AABB`, we give an initial projection point,
|
|
||||||
# the query origin itself (since we haven't visited any split node yet)
|
|
||||||
self._root.closest(point, res, n, point)
|
|
||||||
return sorted(res)
|
|
||||||
|
|
||||||
class KdNode[T]:
|
|
||||||
def closest(
|
|
||||||
self,
|
|
||||||
point: Point,
|
|
||||||
out: MaxHeap[ClosestPoint[T]],
|
|
||||||
n: int,
|
|
||||||
projection: Point,
|
|
||||||
) -> None:
|
|
||||||
# Same implementation
|
|
||||||
self.inner.closest(point, out, n, bounds)
|
|
||||||
|
|
||||||
class KdLeafNode[T]:
|
|
||||||
def closest(
|
|
||||||
self,
|
|
||||||
point: Point,
|
|
||||||
out: MaxHeap[ClosestPoint[T]],
|
|
||||||
n: int,
|
|
||||||
projection: Point,
|
|
||||||
) -> None:
|
|
||||||
# Same implementation
|
|
||||||
for p, val in self.points.items():
|
|
||||||
item = ClosestPoint(p, val, dist(p, point))
|
|
||||||
if len(out) < n:
|
|
||||||
out.push(item)
|
|
||||||
elif out.peek().distance > item.distance:
|
|
||||||
out.pushpop(item)
|
|
||||||
|
|
||||||
class KdSplitNode[T]:
|
|
||||||
def closest(
|
|
||||||
self,
|
|
||||||
point: Point,
|
|
||||||
out: list[ClosestPoint[T]],
|
|
||||||
n: int,
|
|
||||||
projection: Point,
|
|
||||||
) -> None:
|
|
||||||
index = self._index(point)
|
|
||||||
self.children[index].closest(point, out, n, projection)
|
|
||||||
# Project onto the splitting plane, for a minimum distance to its points
|
|
||||||
projection = projection.replace(self.axis, self.mid)
|
|
||||||
# If we're at capacity and can't possibly find any closer points, exit
|
|
||||||
if len(out) == n and dist(point, projection) > out.peek().distance:
|
|
||||||
return
|
|
||||||
# Otherwise recurse on the other side to check for nearer neighbours
|
|
||||||
self.children[1 - index].closest(point, out, n, projection)
|
|
||||||
```
|
|
||||||
|
|
||||||
As you can see, the main difference is in `KdSplitNode`'s implementation, where
|
|
||||||
we can quickly compute the minimum distance between the search's origin and all
|
|
||||||
potential points in that subspace.
|
|
|
@ -16,7 +16,7 @@ favorite: false
|
||||||
The flyweight is a well-known
|
The flyweight is a well-known
|
||||||
[GoF](https://en.wikipedia.org/wiki/Design_Patterns) design pattern.
|
[GoF](https://en.wikipedia.org/wiki/Design_Patterns) design pattern.
|
||||||
|
|
||||||
Its intent is to minimize memory usage by reducing the number of instantiations
|
It's intent is to minimize memory usage by reducing the number of instantiations
|
||||||
of a given object.
|
of a given object.
|
||||||
|
|
||||||
I will show you how to implement a robust flyweight in C++, as well as a way to
|
I will show you how to implement a robust flyweight in C++, as well as a way to
|
|
@ -2,12 +2,14 @@
|
||||||
title: "Git Basics"
|
title: "Git Basics"
|
||||||
date: 2020-12-07 18:54:31+0100
|
date: 2020-12-07 18:54:31+0100
|
||||||
draft: false # I don't care for draft mode, git has branches for that
|
draft: false # I don't care for draft mode, git has branches for that
|
||||||
description: "The next step after the basics"
|
description: ""
|
||||||
tags:
|
tags:
|
||||||
- git
|
- git
|
||||||
- cli
|
- cli
|
||||||
categories:
|
categories:
|
||||||
- programming
|
- programming
|
||||||
|
series:
|
||||||
|
- Git basics
|
||||||
favorite: false
|
favorite: false
|
||||||
---
|
---
|
||||||
|
|
||||||
|
@ -134,7 +136,7 @@ branch.
|
||||||
#### Fixup, a practical example
|
#### Fixup, a practical example
|
||||||
|
|
||||||
A specific kind of squashing which I use frequently is the notion of `fixup`s.
|
A specific kind of squashing which I use frequently is the notion of `fixup`s.
|
||||||
Say you've committed a change (*A*), and later on notice that it is missing
|
Say you've commited a change (*A*), and later on notice that it is missing
|
||||||
a part of the changeset. You can decide to commit that missing part (*A-bis*)
|
a part of the changeset. You can decide to commit that missing part (*A-bis*)
|
||||||
and annotate it to mean that it is linked to *A*.
|
and annotate it to mean that it is linked to *A*.
|
||||||
|
|
||||||
|
@ -184,7 +186,7 @@ After applying the rebase, you find yourself with the complete change inside
|
||||||
|
|
||||||
This is especially useful when you want to apply suggestion on a merge request
|
This is especially useful when you want to apply suggestion on a merge request
|
||||||
after it was reviewed. You can keep a clean history without those pesky `Apply
|
after it was reviewed. You can keep a clean history without those pesky `Apply
|
||||||
suggestion ...` commits being part of your history.
|
suggestion ...` commmits being part of your history.
|
||||||
|
|
||||||
### Lost commits and the reflog
|
### Lost commits and the reflog
|
||||||
|
|
||||||
|
@ -315,7 +317,7 @@ easily choose which parts of your changes should end up in the same commit.
|
||||||
Here's a list of commands that you should read-up on, but I won't be presenting
|
Here's a list of commands that you should read-up on, but I won't be presenting
|
||||||
further:
|
further:
|
||||||
|
|
||||||
* `git bisect`
|
* `git bissect`
|
||||||
* `git rerere`
|
* `git rerere`
|
||||||
* `git stash`
|
* `git stash`
|
||||||
* and more...
|
* and more...
|
|
@ -8,8 +8,6 @@ tags:
|
||||||
categories:
|
categories:
|
||||||
favorite: false
|
favorite: false
|
||||||
tikz: true
|
tikz: true
|
||||||
graphviz: true
|
|
||||||
mermaid: true
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## Test post please ignore
|
## Test post please ignore
|
||||||
|
@ -42,29 +40,6 @@ echo hello world | cut -d' ' -f 1
|
||||||
\end{tikzpicture}
|
\end{tikzpicture}
|
||||||
{{% /tikz %}}
|
{{% /tikz %}}
|
||||||
|
|
||||||
### Graphviz support
|
|
||||||
|
|
||||||
{{% graphviz %}}
|
|
||||||
graph {
|
|
||||||
a -- b
|
|
||||||
b -- c
|
|
||||||
c -- a
|
|
||||||
}
|
|
||||||
{{% /graphviz %}}
|
|
||||||
|
|
||||||
### Mermaid support
|
|
||||||
|
|
||||||
{{% mermaid %}}
|
|
||||||
graph TD
|
|
||||||
A[Enter Chart Definition] --> B(Preview)
|
|
||||||
B --> C{decide}
|
|
||||||
C --> D[Keep]
|
|
||||||
C --> E[Edit Definition]
|
|
||||||
E --> B
|
|
||||||
D --> F[Save Image and Code]
|
|
||||||
F --> B
|
|
||||||
{{% /graphviz %}}
|
|
||||||
|
|
||||||
### Spoilers
|
### Spoilers
|
||||||
|
|
||||||
{{% spoiler "Don't open me" %}}
|
{{% spoiler "Don't open me" %}}
|
|
@ -14,8 +14,8 @@ favorite: false
|
||||||
---
|
---
|
||||||
|
|
||||||
Coming back from our last post about [generic flyweights in C++]({{< relref
|
Coming back from our last post about [generic flyweights in C++]({{< relref
|
||||||
"../2020-07-16-generic-flyweight-cpp/index.md" >}}), we can write a flyweight
|
"generic-flyweight-cpp.md" >}}), we can write a flyweight that can be used with
|
||||||
that can be used with any abstract base classes.
|
any abstract base classes.
|
||||||
|
|
||||||
<!--more-->
|
<!--more-->
|
||||||
|
|
||||||
|
@ -68,7 +68,7 @@ public:
|
||||||
const std::type_index lhs_i(lhs);
|
const std::type_index lhs_i(lhs);
|
||||||
const std::type_index rhs_i(rhs);
|
const std::type_index rhs_i(rhs);
|
||||||
if (lhs_i != rhs_i)
|
if (lhs_i != rhs_i)
|
||||||
return lhs_i < rhs_i;
|
returh lhs_i < rhs_i;
|
||||||
// We are now assured that both classes have the same type
|
// We are now assured that both classes have the same type
|
||||||
return less_than(rhs);
|
return less_than(rhs);
|
||||||
}
|
}
|
|
@ -8,19 +8,7 @@ disable_feed: true
|
||||||
A few of my Amazon wish lists in case you want to give me a gift.
|
A few of my Amazon wish lists in case you want to give me a gift.
|
||||||
|
|
||||||
* [Wish list](https://www.amazon.fr/hz/wishlist/ls/1FT0IO9JJTX57)
|
* [Wish list](https://www.amazon.fr/hz/wishlist/ls/1FT0IO9JJTX57)
|
||||||
* ~~[Board games](https://www.amazon.fr/hz/wishlist/ls/2NY50W36THGMW)~~
|
* [Board games](https://www.amazon.fr/hz/wishlist/ls/2NY50W36THGMW)
|
||||||
* See the [Board Game Geek list](https://boardgamegeek.com/wishlist/Ambroisie)
|
|
||||||
which is better curated and more up-to-date
|
|
||||||
* [Cooking & kitchenware](https://www.amazon.fr/hz/wishlist/ls/2MNRCLPNABZSU)
|
* [Cooking & kitchenware](https://www.amazon.fr/hz/wishlist/ls/2MNRCLPNABZSU)
|
||||||
* [Data Structures & Algorithms](https://www.amazon.fr/hz/wishlist/ls/2XZPQSBOGOFC3)
|
* [Data Structures & Algorithms](https://www.amazon.fr/hz/wishlist/ls/2XZPQSBOGOFC3)
|
||||||
* [Programming](https://www.amazon.fr/hz/wishlist/ls/1R4KFV4H2D8IF)
|
* [Programming](https://www.amazon.fr/hz/wishlist/ls/1R4KFV4H2D8IF)
|
||||||
* Other items:
|
|
||||||
* [Chef's presses](https://www.thechefspress.com/shop)
|
|
||||||
* [UK link](https://www.kitchenprovisions.co.uk/products/the-chefs-press-8oz-13oz)
|
|
||||||
* [FR link](https://www.thekitchenlab.fr/fr/p/la-presse-du-chef-poids-de-friture-bruce-hill-13-oz/)
|
|
||||||
* [Combustion Inc thermometer and
|
|
||||||
display](https://combustion.inc/products/predictive-thermometer-gen2-wifi-display)
|
|
||||||
* [Get the one with the range extender if you *really* want to spoil
|
|
||||||
me](https://combustion.inc/products/predictive-thermometer-gen2-wifi-booster-wifi-display)
|
|
||||||
* [Cannelés
|
|
||||||
molds](https://www.laboetgato.fr/en/moules-a-canneles/13964-mould-for-canneles-non-polished-copper-o-45-mm-3333331010026.html)
|
|
||||||
|
|
83
flake.lock
83
flake.lock
|
@ -1,68 +1,28 @@
|
||||||
{
|
{
|
||||||
"nodes": {
|
"nodes": {
|
||||||
"flake-compat": {
|
|
||||||
"flake": false,
|
|
||||||
"locked": {
|
|
||||||
"lastModified": 1696426674,
|
|
||||||
"narHash": "sha256-kvjfFW7WAETZlt09AgDn1MrtKzP7t90Vf7vypd3OL1U=",
|
|
||||||
"owner": "edolstra",
|
|
||||||
"repo": "flake-compat",
|
|
||||||
"rev": "0f9255e01c2351cc7d116c072cb317785dd33b33",
|
|
||||||
"type": "github"
|
|
||||||
},
|
|
||||||
"original": {
|
|
||||||
"owner": "edolstra",
|
|
||||||
"repo": "flake-compat",
|
|
||||||
"type": "github"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"futils": {
|
"futils": {
|
||||||
"inputs": {
|
|
||||||
"systems": "systems"
|
|
||||||
},
|
|
||||||
"locked": {
|
"locked": {
|
||||||
"lastModified": 1710146030,
|
"lastModified": 1622445595,
|
||||||
"narHash": "sha256-SZ5L6eA7HJ/nmkzGG7/ISclqe6oZdOZTNoesiInkXPQ=",
|
"narHash": "sha256-m+JRe6Wc5OZ/mKw2bB3+Tl0ZbtyxxxfnAWln8Q5qs+Y=",
|
||||||
"owner": "numtide",
|
"owner": "numtide",
|
||||||
"repo": "flake-utils",
|
"repo": "flake-utils",
|
||||||
"rev": "b1d9ab70662946ef0850d488da1c9019f3a9752a",
|
"rev": "7d706970d94bc5559077eb1a6600afddcd25a7c8",
|
||||||
"type": "github"
|
"type": "github"
|
||||||
},
|
},
|
||||||
"original": {
|
"original": {
|
||||||
"owner": "numtide",
|
"owner": "numtide",
|
||||||
"ref": "main",
|
"ref": "master",
|
||||||
"repo": "flake-utils",
|
"repo": "flake-utils",
|
||||||
"type": "github"
|
"type": "github"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"gitignore": {
|
|
||||||
"inputs": {
|
|
||||||
"nixpkgs": [
|
|
||||||
"pre-commit-hooks",
|
|
||||||
"nixpkgs"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"locked": {
|
|
||||||
"lastModified": 1709087332,
|
|
||||||
"narHash": "sha256-HG2cCnktfHsKV0s4XW83gU3F57gaTljL9KNSuG6bnQs=",
|
|
||||||
"owner": "hercules-ci",
|
|
||||||
"repo": "gitignore.nix",
|
|
||||||
"rev": "637db329424fd7e46cf4185293b9cc8c88c95394",
|
|
||||||
"type": "github"
|
|
||||||
},
|
|
||||||
"original": {
|
|
||||||
"owner": "hercules-ci",
|
|
||||||
"repo": "gitignore.nix",
|
|
||||||
"type": "github"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"nixpkgs": {
|
"nixpkgs": {
|
||||||
"locked": {
|
"locked": {
|
||||||
"lastModified": 1722415718,
|
"lastModified": 1628320020,
|
||||||
"narHash": "sha256-5US0/pgxbMksF92k1+eOa8arJTJiPvsdZj9Dl+vJkM4=",
|
"narHash": "sha256-4xBEb+TOHyIGpK37EVsZx6dGPwNMf5YWNBJaQ4VyZws=",
|
||||||
"owner": "NixOS",
|
"owner": "NixOS",
|
||||||
"repo": "nixpkgs",
|
"repo": "nixpkgs",
|
||||||
"rev": "c3392ad349a5227f4a3464dce87bcc5046692fce",
|
"rev": "67c80531be622641b5b2ccc3a7aff355cb02476b",
|
||||||
"type": "github"
|
"type": "github"
|
||||||
},
|
},
|
||||||
"original": {
|
"original": {
|
||||||
|
@ -74,21 +34,19 @@
|
||||||
},
|
},
|
||||||
"pre-commit-hooks": {
|
"pre-commit-hooks": {
|
||||||
"inputs": {
|
"inputs": {
|
||||||
"flake-compat": "flake-compat",
|
"flake-utils": [
|
||||||
"gitignore": "gitignore",
|
"futils"
|
||||||
"nixpkgs": [
|
|
||||||
"nixpkgs"
|
|
||||||
],
|
],
|
||||||
"nixpkgs-stable": [
|
"nixpkgs": [
|
||||||
"nixpkgs"
|
"nixpkgs"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"locked": {
|
"locked": {
|
||||||
"lastModified": 1721042469,
|
"lastModified": 1621411868,
|
||||||
"narHash": "sha256-6FPUl7HVtvRHCCBQne7Ylp4p+dpP3P/OYuzjztZ4s70=",
|
"narHash": "sha256-R+7OQ2JYFCb3E7Jl7LhRifzMVCR6Gl8R98zYsNhZtJ8=",
|
||||||
"owner": "cachix",
|
"owner": "cachix",
|
||||||
"repo": "pre-commit-hooks.nix",
|
"repo": "pre-commit-hooks.nix",
|
||||||
"rev": "f451c19376071a90d8c58ab1a953c6e9840527fd",
|
"rev": "2e7fac06108b4fc81f5ff9ed9a02bc4f6ede7001",
|
||||||
"type": "github"
|
"type": "github"
|
||||||
},
|
},
|
||||||
"original": {
|
"original": {
|
||||||
|
@ -104,21 +62,6 @@
|
||||||
"nixpkgs": "nixpkgs",
|
"nixpkgs": "nixpkgs",
|
||||||
"pre-commit-hooks": "pre-commit-hooks"
|
"pre-commit-hooks": "pre-commit-hooks"
|
||||||
}
|
}
|
||||||
},
|
|
||||||
"systems": {
|
|
||||||
"locked": {
|
|
||||||
"lastModified": 1681028828,
|
|
||||||
"narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
|
|
||||||
"owner": "nix-systems",
|
|
||||||
"repo": "default",
|
|
||||||
"rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
|
|
||||||
"type": "github"
|
|
||||||
},
|
|
||||||
"original": {
|
|
||||||
"owner": "nix-systems",
|
|
||||||
"repo": "default",
|
|
||||||
"type": "github"
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"root": "root",
|
"root": "root",
|
||||||
|
|
20
flake.nix
20
flake.nix
|
@ -6,7 +6,7 @@
|
||||||
type = "github";
|
type = "github";
|
||||||
owner = "numtide";
|
owner = "numtide";
|
||||||
repo = "flake-utils";
|
repo = "flake-utils";
|
||||||
ref = "main";
|
ref = "master";
|
||||||
};
|
};
|
||||||
|
|
||||||
nixpkgs = {
|
nixpkgs = {
|
||||||
|
@ -22,8 +22,8 @@
|
||||||
repo = "pre-commit-hooks.nix";
|
repo = "pre-commit-hooks.nix";
|
||||||
ref = "master";
|
ref = "master";
|
||||||
inputs = {
|
inputs = {
|
||||||
|
flake-utils.follows = "futils";
|
||||||
nixpkgs.follows = "nixpkgs";
|
nixpkgs.follows = "nixpkgs";
|
||||||
nixpkgs-stable.follows = "nixpkgs";
|
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
@ -61,17 +61,15 @@
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
devShells = {
|
devShell = pkgs.mkShell {
|
||||||
default = pkgs.mkShell {
|
name = "blog";
|
||||||
name = "blog";
|
|
||||||
|
|
||||||
buildInputs = with pkgs; [
|
buildInputs = with pkgs; [
|
||||||
gnumake
|
gnumake
|
||||||
hugo
|
hugo
|
||||||
];
|
];
|
||||||
|
|
||||||
inherit (self.checks.${system}.pre-commit) shellHook;
|
inherit (self.checks.${system}.pre-commit) shellHook;
|
||||||
};
|
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
);
|
);
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
series:
|
serie:
|
||||||
other: "series"
|
other: "serie"
|
||||||
|
|
||||||
Series:
|
Series:
|
||||||
other: "Series"
|
other: "Series"
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
series:
|
serie:
|
||||||
other: "série"
|
other: "série"
|
||||||
|
|
||||||
Series:
|
Series:
|
||||||
|
|
|
@ -11,7 +11,6 @@
|
||||||
<a data-hint="Sourcehut" title="Sourcehut" href="https://sr.ht/~{{ .Site.Author.sourcehut }}" target="_blank" rel="me"> Sourcehut </a>
|
<a data-hint="Sourcehut" title="Sourcehut" href="https://sr.ht/~{{ .Site.Author.sourcehut }}" target="_blank" rel="me"> Sourcehut </a>
|
||||||
<a data-hint="LinkedIn" title="LinkedIn" href="https://www.linkedin.com/in/{{ .Site.Author.linkedin }}" target="_blank" rel="me"> LinkedIn </a>
|
<a data-hint="LinkedIn" title="LinkedIn" href="https://www.linkedin.com/in/{{ .Site.Author.linkedin }}" target="_blank" rel="me"> LinkedIn </a>
|
||||||
<a data-hint="Matrix" title="Matrix" href="https://matrix.to/#/{{ .Site.Author.matrix }}" target="_blank" rel="me"> Matrix </a>
|
<a data-hint="Matrix" title="Matrix" href="https://matrix.to/#/{{ .Site.Author.matrix }}" target="_blank" rel="me"> Matrix </a>
|
||||||
<a data-hint="Mastodon" title="Mastodon" href="https://{{ .Site.Author.mastodon }}" target="_blank" rel="me"> Mastodon </a>
|
|
||||||
<a rel="pgpkey" href="https://key.belanyi.fr/key.pgp"> PGP </a>
|
<a rel="pgpkey" href="https://key.belanyi.fr/key.pgp"> PGP </a>
|
||||||
<link rel="authorization_endpoint" href="https://indieauth.com/auth">
|
<link rel="authorization_endpoint" href="https://indieauth.com/auth">
|
||||||
<p>
|
<p>
|
||||||
|
|
|
@ -3,30 +3,6 @@
|
||||||
<link rel="stylesheet" type="text/css" href="https://tikzjax.com/v1/fonts.css">
|
<link rel="stylesheet" type="text/css" href="https://tikzjax.com/v1/fonts.css">
|
||||||
<script async src="https://tikzjax.com/v1/tikzjax.js"></script>
|
<script async src="https://tikzjax.com/v1/tikzjax.js"></script>
|
||||||
{{ end }}
|
{{ end }}
|
||||||
<!-- Graphviz support -->
|
|
||||||
{{ if (.Params.graphviz) }}
|
|
||||||
<script src="https://cdn.jsdelivr.net/npm/@viz-js/viz@3.7.0/lib/viz-standalone.min.js"></script>
|
|
||||||
<script type="text/javascript">
|
|
||||||
(function() {
|
|
||||||
Viz.instance().then(function(viz) {
|
|
||||||
Array.prototype.forEach.call(document.querySelectorAll("pre.graphviz"), function(x) {
|
|
||||||
var svg = viz.renderSVGElement(x.innerText);
|
|
||||||
// Let CSS take care of the SVG size
|
|
||||||
svg.removeAttribute("width")
|
|
||||||
svg.setAttribute("height", "auto")
|
|
||||||
x.replaceChildren(svg)
|
|
||||||
})
|
|
||||||
})
|
|
||||||
})();
|
|
||||||
</script>
|
|
||||||
{{ end }}
|
|
||||||
<!-- Mermaid support -->
|
|
||||||
{{ if (.Params.mermaid) }}
|
|
||||||
<script type="module" async>
|
|
||||||
import mermaid from "https://cdn.jsdelivr.net/npm/mermaid@latest/dist/mermaid.esm.min.mjs";
|
|
||||||
mermaid.initialize({ startOnLoad: true });
|
|
||||||
</script>
|
|
||||||
{{ end }}
|
|
||||||
{{ with .OutputFormats.Get "atom" -}}
|
{{ with .OutputFormats.Get "atom" -}}
|
||||||
{{ printf `<link rel="%s" type="%s" href="%s" title="%s" />` .Rel .MediaType.Type .Permalink $.Site.Title | safeHTML }}
|
{{ printf `<link rel="%s" type="%s" href="%s" title="%s" />` .Rel .MediaType.Type .Permalink $.Site.Title | safeHTML }}
|
||||||
{{ end -}}
|
{{ end -}}
|
||||||
|
|
|
@ -1,16 +0,0 @@
|
||||||
<pre class="graphviz">
|
|
||||||
{{ with .Get "file" }}
|
|
||||||
{{ if eq (. | printf "%.1s") "/" }}
|
|
||||||
{{/* Absolute path are from root of site. */}}
|
|
||||||
{{ $.Scratch.Set "filepath" . }}
|
|
||||||
{{ else }}
|
|
||||||
{{/* Relative paths are from page directory. */}}
|
|
||||||
{{ $.Scratch.Set "filepath" $.Page.File.Dir }}
|
|
||||||
{{ $.Scratch.Add "filepath" . }}
|
|
||||||
{{ end }}
|
|
||||||
|
|
||||||
{{ $.Scratch.Get "filepath" | readFile }}
|
|
||||||
{{ else }}
|
|
||||||
{{.Inner}}
|
|
||||||
{{ end }}
|
|
||||||
</pre>
|
|
|
@ -1,16 +0,0 @@
|
||||||
<pre class="mermaid">
|
|
||||||
{{ with .Get "file" }}
|
|
||||||
{{ if eq (. | printf "%.1s") "/" }}
|
|
||||||
{{/* Absolute path are from root of site. */}}
|
|
||||||
{{ $.Scratch.Set "filepath" . }}
|
|
||||||
{{ else }}
|
|
||||||
{{/* Relative paths are from page directory. */}}
|
|
||||||
{{ $.Scratch.Set "filepath" $.Page.File.Dir }}
|
|
||||||
{{ $.Scratch.Add "filepath" . }}
|
|
||||||
{{ end }}
|
|
||||||
|
|
||||||
{{ $.Scratch.Get "filepath" | readFile }}
|
|
||||||
{{ else }}
|
|
||||||
{{.Inner}}
|
|
||||||
{{ end }}
|
|
||||||
</pre>
|
|
|
@ -1,16 +1,3 @@
|
||||||
<script type="text/tikz">
|
<script type="text/tikz">
|
||||||
{{ with .Get "file" }}
|
{{.Inner}}
|
||||||
{{ if eq (. | printf "%.1s") "/" }}
|
|
||||||
{{/* Absolute path are from root of site. */}}
|
|
||||||
{{ $.Scratch.Set "filepath" . }}
|
|
||||||
{{ else }}
|
|
||||||
{{/* Relative paths are from page directory. */}}
|
|
||||||
{{ $.Scratch.Set "filepath" $.Page.File.Dir }}
|
|
||||||
{{ $.Scratch.Add "filepath" . }}
|
|
||||||
{{ end }}
|
|
||||||
|
|
||||||
{{ $.Scratch.Get "filepath" | readFile }}
|
|
||||||
{{ else }}
|
|
||||||
{{.Inner}}
|
|
||||||
{{ end }}
|
|
||||||
</script>
|
</script>
|
||||||
|
|
|
@ -1 +1 @@
|
||||||
Subproject commit d77e0d6d552ad135138f1f9443ef30cc5af8f0f3
|
Subproject commit 5dab60e04a37896c09a32137aefe821c63b3af04
|
|
@ -1 +1 @@
|
||||||
Subproject commit d545effed9949bf834eaed09ad423ec3e030794f
|
Subproject commit 5da913dc46d2dadcaf6548256238c58c504476de
|
Loading…
Reference in a new issue