Compare commits
1 commit
main
...
ee916a506b
Author | SHA1 | Date | |
---|---|---|---|
Bruno BELANYI | ee916a506b |
64
.drone.jsonnet
Normal file
64
.drone.jsonnet
Normal file
|
@ -0,0 +1,64 @@
|
|||
local Pipeline(isDev) = {
|
||||
kind: "pipeline",
|
||||
name: if isDev then "deploy-dev" else "deploy-prod",
|
||||
# Dev ignores "master", prod only triggers on "master"
|
||||
trigger: { branch: { [if isDev then "exclude" else "include"]: [ "main" ] } },
|
||||
# We want to clone the submodules, which isn't done by default
|
||||
clone: { disable: true },
|
||||
steps: [
|
||||
{
|
||||
name: "clone",
|
||||
image: "plugins/git",
|
||||
recursive: true,
|
||||
},
|
||||
{
|
||||
name: "markdownlint",
|
||||
image: "06kellyjac/markdownlint-cli",
|
||||
commands: [
|
||||
"markdownlint --version",
|
||||
"markdownlint content/",
|
||||
],
|
||||
},
|
||||
{
|
||||
name: "build",
|
||||
image: "klakegg/hugo",
|
||||
commands: [
|
||||
"hugo version",
|
||||
# If dev, include drafts and future articles, change base URL
|
||||
"hugo --minify" + if isDev then " -D -F -b https://dev.belanyi.fr" else "",
|
||||
],
|
||||
[if !isDev then "environment"]: { HUGO_ENV: "production" }
|
||||
},
|
||||
{
|
||||
name: "deploy",
|
||||
image: "appleboy/drone-scp",
|
||||
settings: {
|
||||
source: "public/*",
|
||||
strip_components: 1, # Remove 'public/' suffix from file paths
|
||||
rm: true, # Remove previous files from target directory
|
||||
host: { from_secret: "ssh_host" },
|
||||
target: { from_secret: "ssh_target" + if isDev then "_dev" else "" },
|
||||
username: { from_secret: "ssh_user" },
|
||||
key: { from_secret: "ssh_key" },
|
||||
port: { from_secret: "ssh_port" },
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "notify",
|
||||
image: "plugins/matrix",
|
||||
settings: {
|
||||
homeserver: { from_secret: "matrix_homeserver" },
|
||||
roomid: { from_secret: "matrix_roomid" },
|
||||
username: { from_secret: "matrix_username" },
|
||||
password: { from_secret: "matrix_password" },
|
||||
},
|
||||
trigger: { status: [ "failure", "success", ] },
|
||||
},
|
||||
]
|
||||
};
|
||||
|
||||
|
||||
[
|
||||
Pipeline(false),
|
||||
Pipeline(true),
|
||||
]
|
7
.envrc
7
.envrc
|
@ -1 +1,8 @@
|
|||
use_flake() {
|
||||
watch_file flake.nix
|
||||
watch_file flake.lock
|
||||
eval "$(nix print-dev-env)"
|
||||
}
|
||||
|
||||
use flake
|
||||
eval "$shellHooks"
|
||||
|
|
|
@ -1,3 +0,0 @@
|
|||
# MD024/no-duplicate-heading/no-duplicate-header
|
||||
MD024:
|
||||
siblings_only: true
|
|
@ -1,64 +0,0 @@
|
|||
labels:
|
||||
backend: local
|
||||
|
||||
matrix:
|
||||
include:
|
||||
- TYPE: dev
|
||||
MAKE_TARGET: build-dev
|
||||
SSH_TARGET: ssh_target_dev
|
||||
- TYPE: prod
|
||||
MAKE_TARGET: build-prod
|
||||
SSH_TARGET: ssh_target
|
||||
|
||||
# Run the correct matrix build on the correct branch
|
||||
when:
|
||||
evaluate: |
|
||||
((CI_COMMIT_BRANCH == CI_REPO_DEFAULT_BRANCH) == ("${TYPE}" == "prod"))
|
||||
|
||||
steps:
|
||||
- name: check
|
||||
image: bash
|
||||
commands:
|
||||
- nix flake check
|
||||
|
||||
- name: build (${TYPE})
|
||||
image: bash
|
||||
commands:
|
||||
# If dev, include drafts and future articles, change base URL
|
||||
- nix develop -c make ${MAKE_TARGET}
|
||||
|
||||
- name: deploy (${TYPE})
|
||||
image: bash
|
||||
environment:
|
||||
# Trailing slash to synchronize the folder's *content* to the target
|
||||
SYNC_SOURCE: public/
|
||||
SYNC_KEY:
|
||||
from_secret: ssh_key
|
||||
SYNC_PORT:
|
||||
from_secret: ssh_port
|
||||
SYNC_TARGET:
|
||||
from_secret: ${SSH_TARGET}
|
||||
SYNC_USERNAME:
|
||||
from_secret: ssh_user
|
||||
SYNC_HOST:
|
||||
from_secret: ssh_host
|
||||
commands:
|
||||
- "nix run github:ambroisie/nix-config#drone-rsync"
|
||||
|
||||
- name: notify
|
||||
image: bash
|
||||
environment:
|
||||
ADDRESS:
|
||||
from_secret: matrix_homeserver
|
||||
ROOM:
|
||||
from_secret: matrix_roomid
|
||||
USER:
|
||||
from_secret: matrix_username
|
||||
PASS:
|
||||
from_secret: matrix_password
|
||||
commands:
|
||||
- nix run github:ambroisie/matrix-notifier
|
||||
when:
|
||||
status:
|
||||
- failure
|
||||
- success
|
7
Makefile
7
Makefile
|
@ -3,7 +3,7 @@ all: build-dev
|
|||
|
||||
.PHONY: build-dev
|
||||
build-dev:
|
||||
HUGO_TITLE="Ambroisie's dev blog" HUGO_BASEURL=https://dev.belanyi.fr hugo -D -F
|
||||
HUGO_BASEURL=https://dev.belanyi.fr hugo -D -F
|
||||
|
||||
.PHONY: build-prod
|
||||
build-prod:
|
||||
|
@ -13,6 +13,11 @@ build-prod:
|
|||
serve:
|
||||
hugo server -D -F
|
||||
|
||||
deploy:
|
||||
@if [ -n "$$KEY" ]; then eval "$$(ssh-agent)"; echo "$$KEY" | ssh-add -; fi
|
||||
if [ -z "$$USERNAME" ] || [ -z "$$SSH_HOST" ] || [ -z "$$TARGET" ]; then exit 1; fi
|
||||
rsync --progress -avz --delete public/ "$$USERNAME@$$SSH_HOST:$$TARGET"
|
||||
|
||||
.PHONY: clean
|
||||
clean:
|
||||
$(RM) -r public
|
||||
|
|
|
@ -5,18 +5,15 @@ draft: false # I don't care for draft mode, git has branches for that
|
|||
description: ""
|
||||
tags:
|
||||
- accounting
|
||||
- algorithms
|
||||
- c++
|
||||
- ci/cd
|
||||
- cli
|
||||
- data structures
|
||||
- design-pattern
|
||||
- docker
|
||||
- drone
|
||||
- git
|
||||
- hugo
|
||||
- nix
|
||||
- python
|
||||
- self-hosting
|
||||
- test
|
||||
categories:
|
||||
|
|
16
config.yaml
16
config.yaml
|
@ -6,6 +6,8 @@ theme:
|
|||
- "hugo-atom-feed"
|
||||
- "anubis"
|
||||
paginate: 5
|
||||
disqusShortname: ""
|
||||
googleAnalytics: ""
|
||||
enableRobotsTXT: true
|
||||
enableEmoji: true
|
||||
|
||||
|
@ -35,12 +37,11 @@ menu:
|
|||
author:
|
||||
name: "Bruno BELANYI"
|
||||
email: "contact-blog@belanyi.fr"
|
||||
github: "ambroisie"
|
||||
gitlab: "ambroisie"
|
||||
github: "Ambroisie"
|
||||
gitlab: "Ambroisie"
|
||||
sourcehut: "ambroisie"
|
||||
linkedin: "bruno-belanyi"
|
||||
matrix: "@ambroisie:belanyi.fr"
|
||||
mastodon: "nixos.paris/@ambroisie"
|
||||
|
||||
permalinks:
|
||||
posts: /:year/:month/:day/:title/
|
||||
|
@ -65,18 +66,11 @@ params:
|
|||
webmentions:
|
||||
login: belanyi.fr
|
||||
pingback: true
|
||||
mathjax: true
|
||||
|
||||
services:
|
||||
disqus:
|
||||
shortname: ""
|
||||
googleAnalytics:
|
||||
ID: ""
|
||||
|
||||
taxonomies:
|
||||
category: "categories"
|
||||
tag: "tags"
|
||||
series: "series"
|
||||
serie: "series"
|
||||
|
||||
markup:
|
||||
goldmark:
|
||||
|
|
|
@ -4,13 +4,7 @@ description: "About me"
|
|||
date: 2020-07-14
|
||||
---
|
||||
|
||||
I'm currently working as a Senior Software Engineer at [Google][google], as part
|
||||
of their Embedded Graphics Drivers team for Pixel devices.
|
||||
|
||||
[google]: https://www.linkedin.com/company/google/mycompany/verification/
|
||||
I'm a CS student at EPITA.
|
||||
|
||||
You can find my CV [here](https://cv.belanyi.fr/en.pdf). Or
|
||||
[here](https://cv.belanyi.fr/fr.pdf) for the french version.
|
||||
|
||||
If you are a friend or family, you might be looking for my [wish
|
||||
lists]({{< ref "wish-lists.md" >}}) to find some present ideas.
|
||||
|
|
|
@ -1,172 +0,0 @@
|
|||
---
|
||||
title: "Magic Conversions in C++"
|
||||
date: 2021-10-01T14:46:14+02:00
|
||||
draft: false # I don't care for draft mode, git has branches for that
|
||||
description: "How to get the compiler to infer the correct conversion"
|
||||
tags:
|
||||
- c++
|
||||
- design-pattern
|
||||
categories:
|
||||
- programming
|
||||
series:
|
||||
favorite: false
|
||||
disable_feed: false
|
||||
---
|
||||
|
||||
One feature that I like a lot in [Rust][rust-lang] is return type polymorphism,
|
||||
best exemplified with the following snippet of code:
|
||||
|
||||
```rust
|
||||
use std::collections::HashSet;
|
||||
|
||||
fn main() {
|
||||
let vec: Vec<_> = (0..10).filter(|a| a % 2 == 0).collect();
|
||||
let set: HashSet<_> = (0..10).filter(|a| a % 2 == 0).collect();
|
||||
println!("vec: {:?}", vec);
|
||||
println!("set: {:?}", set);
|
||||
}
|
||||
```
|
||||
|
||||
We have the same expression (`(0..10).filter(|a| a % 2 == 0).collect()`) that
|
||||
results in two totally different types of values (a `Vec` and a `HashSet`)!
|
||||
|
||||
This is because Rust allows you to write a function which is generic in its
|
||||
*return type*, which is a super-power that C++ does not have. But is there a way
|
||||
to emulate this behaviour with some clever code?
|
||||
|
||||
[rust-lang]: https://rust-lang.org/
|
||||
<!--more-->
|
||||
|
||||
## The problem
|
||||
|
||||
For the purposes of this article, the problem that I am trying to solve will be
|
||||
the following:
|
||||
|
||||
```c++
|
||||
void takes_small_array(std::array<char, 32> arr);
|
||||
void takes_big_array(std::array<char, 4096> arr);
|
||||
|
||||
// How to define a `to_array` function so that the following works?
|
||||
void test(std::string_view s) {
|
||||
takes_small_array(to_array(s));
|
||||
takes_big_array(to_array(s));
|
||||
}
|
||||
```
|
||||
|
||||
## First attempt
|
||||
|
||||
If we try to solve this in a way similar to Rust, we hit a problem in what the
|
||||
language allows us to write:
|
||||
|
||||
```c++
|
||||
std::array<char, 32> to_array(std::string_view s) {
|
||||
std::array<char, 32> ret;
|
||||
std::copy(s.begin(), s.end(), ret.begin());
|
||||
return ret;
|
||||
}
|
||||
|
||||
std::array<char, 4096> to_array(std::string_view s) {
|
||||
std::array<char, 4096> ret;
|
||||
std::copy(s.begin(), s.end(), ret.begin());
|
||||
return ret;
|
||||
}
|
||||
```
|
||||
|
||||
The compiler complains with the following error:
|
||||
|
||||
```none
|
||||
ambiguating new declaration of 'std::array<char, 4096> to_array(std::string_view)'
|
||||
note: old declaration 'std::array<char, 32> to_array(std::string_view)'
|
||||
```
|
||||
|
||||
That is because C++ does **not** allow you to write an overload set based on
|
||||
*return type only*.
|
||||
|
||||
## Using templates
|
||||
|
||||
For our second try, we want to use *non-type template parameters* to solve the
|
||||
issue. We write the following:
|
||||
|
||||
```c++
|
||||
template <size_t N>
|
||||
std::array<char, N> to_array(std::string_view s) {
|
||||
std::array<char, N> ret;
|
||||
std::copy(s.begin(), s.end(), ret.begin());
|
||||
return ret;
|
||||
}
|
||||
```
|
||||
|
||||
The compiler does not complain when we write this! We have also solved two minor
|
||||
issues with the previous try: the size of the arrays are not hard-coded, and we
|
||||
kept the code DRY.
|
||||
|
||||
However we have some trouble trying to use those functions as stated in the
|
||||
beginning of the problem, with the following error message:
|
||||
|
||||
```none
|
||||
error: no matching function for call to 'to_array(std::string_view&)'
|
||||
| takes_small_array(to_array(s));
|
||||
note: candidate: 'template<size_t N> std::array<char, N> to_array(std::string_view)'
|
||||
| std::array<char, N> to_array(std::string_view s) {
|
||||
note: template argument deduction/substitution failed:
|
||||
note: couldn't deduce template parameter 'N'
|
||||
```
|
||||
|
||||
The compiler cannot deduce the size of the array we want to use! We could solve
|
||||
the issue by explicitly giving a size when calling the function
|
||||
(`to_array<32>(s)`) however this is unsatisfactory: we are not solving the
|
||||
problem as stated initially, which could for example lead to needless churning
|
||||
if we change the signature of `takes_small_array` to instead use
|
||||
`std::array<char, 64>`).
|
||||
|
||||
Thankfully there is a way to use the compiler to our advantage, and have it
|
||||
deduce it for us, but it involves some trickery.
|
||||
|
||||
## The solution
|
||||
|
||||
We want to write a function that resolves the previous two issues we
|
||||
experienced:
|
||||
|
||||
* The non-type template parameter must be deduced by the end of the call to
|
||||
`to_array`, but we can only deduce it once it is being consumed by
|
||||
`takes_{small,big}_array` -- which is too late for the compiler.
|
||||
* We cannot overload on the return type, which means we must return a single
|
||||
type from the function.
|
||||
|
||||
The goal is to delay *when* the deduction of the array's size is happening,
|
||||
which can be done by using a *templated conversion operator*.
|
||||
|
||||
So the solution to our problem is to do the following:
|
||||
|
||||
```c++
|
||||
class ToArray {
|
||||
std::string_view s_;
|
||||
|
||||
public:
|
||||
ToArray(std::string_view s) : s_(s) {}
|
||||
|
||||
template <size_t N>
|
||||
operator std::array<char, N>() const {
|
||||
std::array<char, N> ret;
|
||||
std::copy(s_.begin(), s_.end(), ret.begin());
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
ToArray to_array(std::string_view s) {
|
||||
return ToArray{s};
|
||||
}
|
||||
```
|
||||
|
||||
The following steps happen when trying to call `takes_small_array(to_array(s))`:
|
||||
|
||||
* `to_array(s)` returns a `ToArray` value.
|
||||
* the `ToArray` value is not an `array<char, 32>`, but has an implicit
|
||||
conversion operator, which the compiler invokes.
|
||||
* `takes_small_array` is called with the converted `array<char, 32>` value.
|
||||
|
||||
We now have a "magic" function which can convert a `string_view` to an
|
||||
`std::array` of characters of any size. We could further improve this by
|
||||
ensuring that the array is terminated with a `'\0'`, throwing an exception when
|
||||
the array is too small for the given string, etc... This is left as an exercise
|
||||
to the reader.
|
|
@ -1,329 +0,0 @@
|
|||
---
|
||||
title: "Multiple Dispatch in C++"
|
||||
date: 2022-11-02T16:36:53+01:00
|
||||
draft: false # I don't care for draft mode, git has branches for that
|
||||
description: "A Lisp super-power in C++"
|
||||
tags:
|
||||
- c++
|
||||
- design-pattern
|
||||
categories:
|
||||
- programming
|
||||
series:
|
||||
favorite: false
|
||||
disable_feed: false
|
||||
---
|
||||
|
||||
A great feature that can be used in more dynamic languages is *multiple
|
||||
dispatch*. Here's an example in [Julia][julia-lang] taken from the [Wikipedia
|
||||
article][wiki-multiple-dispatch].
|
||||
|
||||
```julia
|
||||
abstract type SpaceObject end
|
||||
|
||||
struct Asteroid <: SpaceObject
|
||||
# Asteroid fields
|
||||
end
|
||||
struct Spaceship <: SpaceObject
|
||||
# Spaceship fields
|
||||
end
|
||||
|
||||
collide_with(::Asteroid, ::Spaceship) = # Asteroid/Spaceship collision
|
||||
collide_with(::Spaceship, ::Asteroid) = # Spaceship/Asteroid collision
|
||||
collide_with(::Spaceship, ::Spaceship) = # Spaceship/Spaceship collision
|
||||
collide_with(::Asteroid, ::Asteroid) = # Asteroid/Asteroid collision
|
||||
|
||||
collide(x::SpaceObject, y::SpaceObject) = collide_with(x, y)
|
||||
```
|
||||
|
||||
The `collide` function calls `collide_with` which, at runtime, will inspect the
|
||||
types of its arguments and *dispatch* to the appropriate implementation.
|
||||
|
||||
Julia was created with multiple dispatch as a first-class citizen, it is used
|
||||
liberally in its ecosystem. C++ does not have access to such a feature natively,
|
||||
but there are alternatives that I will be presenting in this article, and try to
|
||||
justify there uses and limitations.
|
||||
|
||||
[julia-lang]: https://julialang.org/
|
||||
[wiki-multiple-dispatch]: https://en.wikipedia.org/wiki/Multiple_dispatch
|
||||
<!--more-->
|
||||
|
||||
## Single dispatch
|
||||
|
||||
The native way to perform dynamic dispatch in C++ is through the
|
||||
use of *virtual methods*, which allows an object to *override* the behaviour of
|
||||
one of its super-classes' method.
|
||||
|
||||
Invoking a virtual method will perform *single dispatch*, on the dynamic type
|
||||
of the object who's method is being called.
|
||||
|
||||
Here is an example:
|
||||
|
||||
```cpp
|
||||
struct SpaceObject {
|
||||
virtual ~SpaceObject() = default;
|
||||
|
||||
// Pure virtual method, which must be overridden by non-abstract sub-classes
|
||||
virtual void impact() = 0;
|
||||
};
|
||||
|
||||
struct Asteroid : SpaceObject {
|
||||
// Override the method for asteroid impacts
|
||||
void impact() override {
|
||||
std::cout << "Bang!\n";
|
||||
}
|
||||
};
|
||||
|
||||
struct Spaceship : SpaceObject {
|
||||
// Override the method for spaceship impacts
|
||||
void impact() override {
|
||||
std::cout << "Crash!\n";
|
||||
}
|
||||
};
|
||||
|
||||
int main() {
|
||||
std::unique_ptr<SpaceObject> object = std::make_unique<Spaceship>();
|
||||
object->impact(); // Prints "Crash!"
|
||||
|
||||
object = std::make_unique<Asteroid>();
|
||||
object->impact(); // Prints "Bang!"
|
||||
}
|
||||
```
|
||||
|
||||
Virtual methods are great when you want to represent a common set of behaviour
|
||||
(an *interface*), and be able to substitute various types with their specific
|
||||
implementation.
|
||||
|
||||
For example, a dummy file-system interface might look like the following:
|
||||
|
||||
```cpp
|
||||
struct Filesystem {
|
||||
virtual void write(std::string_view filename, std::span<char> data) = 0;
|
||||
virtual std::vector<char> read(std::string_view filename) = 0;
|
||||
virtual void delete(std::string_view filename) = 0;
|
||||
};
|
||||
```
|
||||
|
||||
You can then write `PosixFilesystem` which makes use of the POSIX API and
|
||||
interact with actual on-disk data, `MockFilesystem` which only works in-memory
|
||||
and can be used for testing, etc...
|
||||
|
||||
## Double dispatch through the Visitor pattern
|
||||
|
||||
Sometimes single dispatch is not enough, such as in the collision example at the
|
||||
beginning of this article. In cases where a computation depends on the dynamic
|
||||
type of *two* of its values, we can make use of double-dispatch by leveraging
|
||||
the Visitor design pattern. This is done by calling a virtual method on the
|
||||
first value, which itself will call a virtual method on the second value.
|
||||
|
||||
Here's a commentated example:
|
||||
|
||||
```cpp
|
||||
struct Asteroid;
|
||||
struct Spaceship;
|
||||
|
||||
struct SpaceObject {
|
||||
virtual ~SpaceObject() = default;
|
||||
|
||||
// Only used to kick-start the double-dispatch process
|
||||
virtual void collide_with(SpaceObject& other) = 0;
|
||||
|
||||
// The actual dispatching methods
|
||||
virtual void collide_with(Asteroid& other) = 0;
|
||||
virtual void collide_with(Spaceship& other) = 0;
|
||||
};
|
||||
|
||||
struct Asteroid : SpaceObject {
|
||||
void collide_with(SpaceObject& other) override {
|
||||
// `*this` is an `Asteroid&` which kick-starts the double-dispatch
|
||||
other.collide_with(*this);
|
||||
};
|
||||
|
||||
void collide_with(Asteroid& other) override { /* Asteroid/Asteroid */ };
|
||||
void collide_with(Spaceship& other) override { /* Asteroid/Spaceship */ };
|
||||
};
|
||||
|
||||
struct Spaceship : SpaceObject {
|
||||
void collide_with(SpaceObject& other) override {
|
||||
// `*this` is a `Spaceship&` which kick-starts the double-dispatch
|
||||
other.collide_with(*this);
|
||||
};
|
||||
|
||||
void collide_with(Asteroid& other) override { /* Spaceship/Asteroid */ };
|
||||
void collide_with(Spaceship& other) override { /* Spaceship/Spaceship */ };
|
||||
};
|
||||
|
||||
void collide(SpaceObject& first, SpaceObject& second) {
|
||||
first.collide_with(second);
|
||||
};
|
||||
|
||||
int main() {
|
||||
auto asteroid = std::make_unique<Asteroid>();
|
||||
auto spaceship = std::make_unique<Spaceship>();
|
||||
|
||||
collide(*asteroid, *spaceship);
|
||||
// Calls in order:
|
||||
// - Asteroid::collide_with(SpaceObject&)
|
||||
// - Spaceship::collide_with(Asteroid&)
|
||||
|
||||
collide(*spaceship, *asteroid);
|
||||
// Calls in order:
|
||||
// - Spaceship::collide_with(SpaceObject&)
|
||||
// - Asteroid::collide_with(Spaceship&)
|
||||
|
||||
asteroid->collide_with(*spaceship);
|
||||
// Only calls Asteroid::collide_with(Spaceship&)
|
||||
|
||||
spaceship->collide_with(*asteroid);
|
||||
// Only calls Spaceship::collide_with(Asteroid&)
|
||||
}
|
||||
```
|
||||
|
||||
Double dispatch is pattern is most commonly used with the *visitor pattern*, in
|
||||
which a closed class hierarchy (the data) is separated from an open class
|
||||
hierarchy (the algorithms acting on that data). This is especially useful in
|
||||
e.g: compilers, where the AST class hierarchy represents the data *only*, and
|
||||
all compiler stages and optimization passes are programmed by a series of
|
||||
visitors.
|
||||
|
||||
One downside of this approach is that if you want to add `SpaceStation` as
|
||||
a sub-class of `SpaceObject`, and handle its collisions with other
|
||||
`SpaceObject`s, you need to:
|
||||
|
||||
* Implement all `collide_with` methods for this new class.
|
||||
* Add a new virtual method `collide_with(SpaceStation&)` and implement it on
|
||||
every sub-class.
|
||||
|
||||
This can be inconvenient if your class hierarchy changes often.
|
||||
|
||||
## Multiple dispatch on a closed class hierarchy
|
||||
|
||||
When even double dispatch is not enough, there is a way to do multiple dispatch
|
||||
in standard C++, included in the STL since C++17. However unlike the previous
|
||||
methods I showed, this one relies on using [`std::variant`][variant-cppref] and
|
||||
[`std::visit`][visit-cppref].
|
||||
|
||||
[variant-cppref]: https://en.cppreference.com/w/cpp/utility/variant
|
||||
[visit-cppref]: https://en.cppreference.com/w/cpp/utility/variant/visit
|
||||
|
||||
The limitation of `std::variant` is that you are limited to the types you can
|
||||
select at *compile-time* for the values used during your dispatch operation.
|
||||
You have a *closed* hierarchy of classes, which is the explicit list of types in
|
||||
your `variant`.
|
||||
|
||||
Nonetheless, if you can live with that limitation, then you have a great amount
|
||||
of power available to you. I have used `std::visit` in the past to mimic the
|
||||
effect of pattern matching.
|
||||
|
||||
In this example, I re-create the double-dispatch from the previous section:
|
||||
|
||||
```cpp
|
||||
// No need to inherit from a `SpaceObject` base class
|
||||
struct Asteroid {};
|
||||
struct Spaceship {};
|
||||
|
||||
// But the list of possible runtime *must* be enumerated at compile-time
|
||||
using SpaceObject = std::variant<Asteroid, Spaceship>;
|
||||
|
||||
void collide(SpaceObject& first, SpaceObject& second) {
|
||||
struct CollideDispatch {
|
||||
void operator()(Asteroid& first, Asteroid& second) {
|
||||
// Asteroid/Asteroid
|
||||
}
|
||||
void operator()(Asteroid& first, Spaceship& second) {
|
||||
// Asteroid/Spaceship
|
||||
}
|
||||
void operator()(Spaceship& first, Asteroid& second) {
|
||||
// Spaceship/Asteroid
|
||||
}
|
||||
void operator()(Spaceship& first, Spaceship& second) {
|
||||
// Spaceship/Spaceship
|
||||
}
|
||||
};
|
||||
|
||||
std::visit(CollideDispatch(), first, second);
|
||||
}
|
||||
|
||||
int main() {
|
||||
SpaceObject asteroid = Asteroid();
|
||||
SpaceObject spaceship = Spaceship();
|
||||
|
||||
collide(asteroid, spaceship);
|
||||
// Calls CollideDispatch::operator()(Asteroid&, Spaceship&)
|
||||
|
||||
collide(spaceship, asteroid);
|
||||
// Calls CollideDispatch::operator()(Spaceship&, Asteroid&)
|
||||
}
|
||||
```
|
||||
|
||||
Obviously, the issue with adding a new `SpaceStation` variant is once again
|
||||
apparent in this implementation. You will get a compile error unless you handle
|
||||
this new `SpaceStation` variant at every point you `visit` the `SpaceObject`s.
|
||||
|
||||
## The Expression Problem
|
||||
|
||||
One issue we have not been able to move past in these examples is the
|
||||
[Expression Problem][expression-problem]. In two words, this means that we can't
|
||||
add a new data type (e.g: `SpaceStation`), or a new operation (e.g: `land_on`)
|
||||
to our current code without re-compiling it.
|
||||
|
||||
[expression-problem]: https://en.wikipedia.org/wiki/Expression_problem
|
||||
|
||||
This is the downside I was pointing out in our previous sections:
|
||||
|
||||
* Data type extension: one can easily add a new `SpaceObject` child-class in the
|
||||
OOP version, but needs to modify each implementation if we want to add a new
|
||||
method to the `SpaceObject` interface to implement a new operation.
|
||||
* Operation extension: one can easily create a new function when using the
|
||||
`std::variant` based representation, as pattern-matching easily allows us to
|
||||
only handle the kinds of values we are interested in. But adding a new
|
||||
`SpaceObject` variant means we need to modify and re-compile every
|
||||
`std::visit` call to handle the new variant.
|
||||
|
||||
There is currently no (good) way in standard C++ to tackle the Expression
|
||||
Problem. A paper ([N2216][N2216]) was written to propose a new language feature
|
||||
to improve the situation. However it looks quite complex, and never got followed
|
||||
up on for standardization.
|
||||
|
||||
[N2216]: https://open-std.org/jtc1/sc22/wg21/docs/papers/2007/n2216.pdf
|
||||
|
||||
In the meantime, one can find some libraries (like [`yomm2`][yomm2]) that
|
||||
reduce the amount of boiler-plate needed to emulate this feature.
|
||||
|
||||
[yomm2]: https://github.com/jll63/yomm2
|
||||
|
||||
```cpp
|
||||
#include <yorel/yomm2/keywords.hpp>
|
||||
|
||||
struct SpaceObject {
|
||||
virtual ~SpaceObject() = default;
|
||||
};
|
||||
|
||||
struct Asteroid : SpaceObject { /* fields, methods, etc... */ };
|
||||
|
||||
struct Spaceship : SpaceObject { /* fields, methods, etc... */ };
|
||||
|
||||
// Register all sub-classes of `SpaceObject` for use with open methods
|
||||
register_classes(SpaceObject, Asteroid, Spaceship);
|
||||
|
||||
// Register the `collide` open method, which dispatches on two arguments
|
||||
declare_method(void, collide, (virtual_<SpaceObject&>, virtual_<SpaceObject&>));
|
||||
|
||||
// Write the different implementations of `collide`
|
||||
define_method(void, collide, (Asteroid& left, Asteroid& right)) { /* work */ }
|
||||
define_method(void, collide, (Asteroid& left, Spaceship& right)) { /* work */ }
|
||||
define_method(void, collide, (Spaceship& left, Asteroid& right)) { /* work */ }
|
||||
define_method(void, collide, (Spaceship& left, Spaceship& right)) { /* work */ }
|
||||
|
||||
|
||||
int main() {
|
||||
yorel::yomm2::update_methods();
|
||||
|
||||
auto asteroid = std::make_unique<Asteroid>();
|
||||
auto spaceship = std::make_unique<Spaceship>();
|
||||
|
||||
collide(*asteroid, *spaceship); // Calls (Asteroid, Spaceship) version
|
||||
collide(*spaceship, *asteroid); // Calls (Spaceship, Asteroid) version
|
||||
collide(*asteroid, *asteroid); // Calls (Asteroid, Asteroid) version
|
||||
collide(*spaceship, *spaceship); // Calls (Spaceship, Spaceship) version
|
||||
}
|
||||
```
|
|
@ -1,157 +0,0 @@
|
|||
---
|
||||
title: "Union Find"
|
||||
date: 2024-06-24T21:07:49+01:00
|
||||
draft: false # I don't care for draft mode, git has branches for that
|
||||
description: "My favorite data structure"
|
||||
tags:
|
||||
- algorithms
|
||||
- data structures
|
||||
- python
|
||||
categories:
|
||||
- programming
|
||||
series:
|
||||
- Cool algorithms
|
||||
favorite: false
|
||||
disable_feed: false
|
||||
---
|
||||
|
||||
To kickoff the [series]({{< ref "/series/cool-algorithms/" >}}) of posts about
|
||||
algorithms and data structures I find interesting, I will be talking about my
|
||||
favorite one: the [_Disjoint Set_][wiki]. Also known as the _Union-Find_ data
|
||||
structure, so named because of its two main operations: `ds.union(lhs, rhs)` and
|
||||
`ds.find(elem)`.
|
||||
|
||||
[wiki]: https://en.wikipedia.org/wiki/Disjoint-set_data_structure
|
||||
|
||||
<!--more-->
|
||||
|
||||
## What does it do?
|
||||
|
||||
The _Union-Find_ data structure allows one to store a collection of sets of
|
||||
elements, with operations for adding new sets, merging two sets into one, and
|
||||
finding the representative member of a set. Not only does it do all that, but it
|
||||
does it in almost constant (amortized) time!
|
||||
|
||||
Here is a small motivating example for using the _Disjoint Set_ data structure:
|
||||
|
||||
```python
|
||||
def connected_components(graph: Graph) -> list[set[Node]]:
|
||||
# Initialize the disjoint set so that each node is in its own set
|
||||
ds: DisjointSet[Node] = DisjointSet(graph.nodes)
|
||||
# Each edge is a connection, merge both sides into the same set
|
||||
for (start, dest) in graph.edges:
|
||||
ds.union(start, dest)
|
||||
# Connected components share the same (arbitrary) root
|
||||
components: dict[Node, set[Node]] = defaultdict(set)
|
||||
for n in graph.nodes:
|
||||
components[ds.find(n)].add(n)
|
||||
# Return a list of disjoint sets corresponding to each connected component
|
||||
return list(components.values())
|
||||
```
|
||||
|
||||
## Implementation
|
||||
|
||||
I will show how to implement `UnionFind` for integers, though it can easily be
|
||||
extended to be used with arbitrary types (e.g: by mapping each element
|
||||
one-to-one to a distinct integer, or using a different set representation).
|
||||
|
||||
### Representation
|
||||
|
||||
Creating a new disjoint set is easy enough:
|
||||
|
||||
```python
|
||||
class UnionFind:
|
||||
_parent: list[int]
|
||||
_rank: list[int]
|
||||
|
||||
def __init__(self, size: int):
|
||||
# Each node is in its own set, making it its own parent...
|
||||
self._parents = list(range(size))
|
||||
# ... And its rank 0
|
||||
self._rank = [0] * size
|
||||
```
|
||||
|
||||
We represent each set through the `_parent` field: each element of the set is
|
||||
linked to its parent, until the root node which is its own parent. When first
|
||||
initializing the structure, each element is in its own set, so we initialize
|
||||
each element to be a root and make it its own parent (`_parent[i] == i` for all
|
||||
`i`).
|
||||
|
||||
The `_rank` field is an optimization which we will touch on in a later section.
|
||||
|
||||
### Find
|
||||
|
||||
A naive Implementation of `find(...)` is simple enough to write:
|
||||
|
||||
```python
|
||||
def find(self, elem: int) -> int:
|
||||
# If `elem` is its own parent, then it is the root of the tree
|
||||
if (parent := self._parent[elem]) == elem:
|
||||
return elem
|
||||
# Otherwise, recurse on the parent
|
||||
return self.find(parent)
|
||||
```
|
||||
|
||||
However, going back up the chain of parents each time we want to find the root
|
||||
node (an `O(n)` operation) would make for disastrous performance. Instead we can
|
||||
do a small optimization called _path splitting_.
|
||||
|
||||
```python
|
||||
def find(self, elem: int) -> int:
|
||||
while (parent := self._parent[elem]) != elem:
|
||||
# Replace each parent link by a link to the grand-parent
|
||||
elem, self._parent[elem] = parent, self._parent[parent]
|
||||
return elem
|
||||
```
|
||||
|
||||
This flattens the chain so that each node links more directly to the root (the
|
||||
length is reduced by half), making each subsequent `find(...)` faster.
|
||||
|
||||
Other compression schemes exist, along the spectrum between faster shortening
|
||||
the chain faster earlier, or updating `_parent` fewer times per `find(...)`.
|
||||
|
||||
### Union
|
||||
|
||||
A naive implementation of `union(...)` is simple enough to write:
|
||||
|
||||
```python
|
||||
def union(self, lhs: int, rhs: int) -> int:
|
||||
# Replace both element by their root parent
|
||||
lhs = self.find(lhs)
|
||||
rhs = self.find(rhs)
|
||||
# arbitrarily merge one into the other
|
||||
self._parent[rhs] = lhs
|
||||
# Return the new root
|
||||
return lhs
|
||||
```
|
||||
|
||||
Once again, improvements can be made. Depending on the order in which we call
|
||||
`union(...)`, we might end up creating a long chain from the leaf of the tree to
|
||||
the root node, leading to slower `find(...)` operations. If at all possible, we
|
||||
would like to keep the trees as shallow as possible.
|
||||
|
||||
To do so, we want to avoid merging taller trees into smaller ones, so as to keep
|
||||
them as balanced as possible. Since a higher tree will result in a slower
|
||||
`find(...)`, keeping the trees balanced will lead to increased performance.
|
||||
|
||||
This is where the `_rank` field we mentioned earlier comes in: the _rank_ of an
|
||||
element is an upper bound on its height in the tree. By keeping track of this
|
||||
_approximate_ height, we can keep the trees balanced when merging them.
|
||||
|
||||
```python
|
||||
def union(self, lhs: int, rhs: int) -> int:
|
||||
lhs = self.find(lhs)
|
||||
rhs = self.find(rhs)
|
||||
# Bail out early if they already belong to the same set
|
||||
if lhs == rhs:
|
||||
return lhs
|
||||
# Always keep `lhs` as the taller tree
|
||||
if (self._rank[lhs] < self._rank[rhs])
|
||||
lhs, rhs = rhs, lhs
|
||||
# Merge the smaller tree into the taller one
|
||||
self._parent[rhs] = lhs
|
||||
# Update the rank when merging trees of approximately the same size
|
||||
if self._rank[lhs] == self._rank[rhs]:
|
||||
self._rank[lhs] += 1
|
||||
return lhs
|
||||
```
|
|
@ -1,171 +0,0 @@
|
|||
---
|
||||
title: "Trie"
|
||||
date: 2024-06-30T11:07:49+01:00
|
||||
draft: false # I don't care for draft mode, git has branches for that
|
||||
description: "A cool map"
|
||||
tags:
|
||||
- algorithms
|
||||
- data structures
|
||||
- python
|
||||
categories:
|
||||
- programming
|
||||
series:
|
||||
- Cool algorithms
|
||||
favorite: false
|
||||
disable_feed: false
|
||||
---
|
||||
|
||||
This time, let's talk about the [_Trie_][wiki], which is a tree-based mapping
|
||||
structure most often used for string keys.
|
||||
|
||||
[wiki]: https://en.wikipedia.org/wiki/Trie
|
||||
|
||||
<!--more-->
|
||||
|
||||
## What does it do?
|
||||
|
||||
A _Trie_ can be used to map a set of string keys to their corresponding values,
|
||||
without the need for a hash function. This also means you won't suffer from hash
|
||||
collisions, though the tree-based structure will probably translate to slower
|
||||
performance than a good hash table.
|
||||
|
||||
A _Trie_ is especially useful to represent a dictionary of words in the case of
|
||||
spell correction, as it can easily be used to fuzzy match words under a given
|
||||
edit distance (think [Levenshtein distance])
|
||||
|
||||
[Levenshtein distance]: https://en.wikipedia.org/wiki/Levenshtein_distance
|
||||
|
||||
## Implementation
|
||||
|
||||
This implementation will be in Python for exposition purposes, even though
|
||||
it already has a built-in `dict`.
|
||||
|
||||
### Representation
|
||||
|
||||
Creating a new `Trie` is easy: the root node starts off empty and without any
|
||||
mapped values.
|
||||
|
||||
```python
|
||||
class Trie[T]:
|
||||
_children: dict[str, Trie[T]]
|
||||
_value: T | None
|
||||
|
||||
def __init__(self):
|
||||
# Each letter is mapped to a Trie
|
||||
self._children = defaultdict(Trie)
|
||||
# If we match a full string, we store the mapped value
|
||||
self._value = None
|
||||
```
|
||||
|
||||
We're using a `defaultdict` for the children for ease of implementation in this
|
||||
post. In reality, I would encourage you exit early when you can't match a given
|
||||
character.
|
||||
|
||||
The string key will be implicit by the position of a node in the tree: the empty
|
||||
string at the root, one-character strings as its direct children, etc...
|
||||
|
||||
### Search
|
||||
|
||||
An exact match look-up is easily done: we go down the tree until we've exhausted
|
||||
the key. At that point we've either found a mapped value or not.
|
||||
|
||||
```python
|
||||
def get(self, key: str) -> T | None:
|
||||
# Have we matched the full key?
|
||||
if not key:
|
||||
# Store the `T` if mapped, `None` otherwise
|
||||
return self._value
|
||||
# Otherwise, recurse on the child corresponding to the first letter
|
||||
return self._children[key[0]].get(key[1:])
|
||||
```
|
||||
|
||||
### Insertion
|
||||
|
||||
Adding a new value to the _Trie_ is similar to a key lookup, only this time we
|
||||
store the new value instead of returning it.
|
||||
|
||||
```python
|
||||
def insert(self, key: str, value: T) -> bool:
|
||||
# Have we matched the full key?
|
||||
if not key:
|
||||
# Check whether we're overwriting a previous mapping
|
||||
was_mapped = self._value is None
|
||||
# Store the corresponding value
|
||||
self._value = value
|
||||
# Return whether we've performed an overwrite
|
||||
return was_mapped
|
||||
# Otherwise, recurse on the child corresponding to the first letter
|
||||
return self._children[key[0]].insert(key[1:], value)
|
||||
```
|
||||
|
||||
### Removal
|
||||
|
||||
Removal should also look familiar.
|
||||
|
||||
```python
|
||||
def remove(self, key: str) -> bool:
|
||||
# Have we matched the full key?
|
||||
if not key:
|
||||
was_mapped = self._value is None
|
||||
# Remove the value
|
||||
self._value = None
|
||||
# Return whether it was mapped
|
||||
return was_mapped
|
||||
# Otherwise, recurse on the child corresponding to the first letter
|
||||
return self._children[key[0]].remove(key[1:])
|
||||
```
|
||||
|
||||
### Fuzzy matching
|
||||
|
||||
Fuzzily matching a given word is where the real difficulty is: the key is to
|
||||
realize we can use the prefix-tree nature of a _Trie_ to avoid doing wasteful
|
||||
work.
|
||||
|
||||
By leveraging the prefix visit order of the tree, we can build an iterative
|
||||
Levenshtein distance matrix, in much the same way one would do so in its
|
||||
[Dynamic Programming] implementation (see the [Wagner-Fisher algorithm]).
|
||||
|
||||
[Dynamic Programming]: https://en.wikipedia.org/wiki/Dynamic_programming
|
||||
[Wagner-Fisher algorithm]: https://en.wikipedia.org/wiki/Wagner%E2%80%93Fischer_algorithm
|
||||
|
||||
```python
|
||||
class FuzzyResult[T](NamedTuple):
|
||||
distance: int
|
||||
key: str
|
||||
value: T
|
||||
|
||||
|
||||
def get_fuzzy(self, key: str, max_distance: int = 0) -> Iterator[FuzzyResult[T]]:
|
||||
def helper(
|
||||
current_word: str,
|
||||
node: Trie[T],
|
||||
previous_row: list[int],
|
||||
) -> Iterator[tuple[int, T]]:
|
||||
# Iterative Levenshtein
|
||||
current_row = [previous_row[0] + 1]
|
||||
current_char = current_word[-1]
|
||||
for column, key_char in enumerate(key, start=1):
|
||||
insertion = current_row[column - 1] + 1
|
||||
deletion = previous_row[column] + 1
|
||||
replacement = previous_row[column - 1] + (key_char != current_char)
|
||||
current_row.append(min(insertion, deletion, replacement))
|
||||
|
||||
# If we are under the max distance, match this node
|
||||
if (distance := current_row[-1]) <= max_distance and node._value != None:
|
||||
# Only if it has a value of course
|
||||
yield FuzzyResult(distance, current_word, node._value)
|
||||
|
||||
# If we can potentially still match children, recurse
|
||||
if min(current_row) <= max_distance:
|
||||
for c, child in node._children.items():
|
||||
yield from helper(current_word + c, child, current_row)
|
||||
|
||||
# Build the first row -- the edit distance from the empty string
|
||||
row = list(range(len(key) + 1))
|
||||
|
||||
# Base case for the empty string
|
||||
if (distance := row[-1]) <= max_distance and self._value != None:
|
||||
yield FuzzyResult(distance, "", self._value)
|
||||
for c, child in self._children.items():
|
||||
yield from helper(c, child, row)
|
||||
```
|
|
@ -1,191 +0,0 @@
|
|||
---
|
||||
title: "Gap Buffer"
|
||||
date: 2024-07-06T21:27:19+01:00
|
||||
draft: false # I don't care for draft mode, git has branches for that
|
||||
description: "As featured in GNU Emacs"
|
||||
tags:
|
||||
- algorithms
|
||||
- data structures
|
||||
- python
|
||||
categories:
|
||||
- programming
|
||||
series:
|
||||
- Cool algorithms
|
||||
favorite: false
|
||||
disable_feed: false
|
||||
---
|
||||
|
||||
The [_Gap Buffer_][wiki] is a popular data structure for text editors to
|
||||
represent files and editable buffers. The most famous of them probably being
|
||||
[GNU Emacs][emacs].
|
||||
|
||||
[wiki]: https://en.wikipedia.org/wiki/Gap_buffer
|
||||
[emacs]: https://www.gnu.org/software/emacs/manual/html_node/elisp/Buffer-Gap.html
|
||||
|
||||
<!--more-->
|
||||
|
||||
## What does it do?
|
||||
|
||||
A _Gap Buffer_ is simply a list of characters, similar to a normal string, with
|
||||
the added twist of splitting it into two side: the prefix and suffix, on either
|
||||
side of the cursor. In between them, a gap is left to allow for quick
|
||||
insertion at the cursor.
|
||||
|
||||
Moving the cursor moves the gap around the buffer, the prefix and suffix getting
|
||||
shorter/longer as required.
|
||||
|
||||
## Implementation
|
||||
|
||||
I'll be writing a sample implementation in Python, as with the rest of the
|
||||
[series]({{< ref "/series/cool-algorithms/" >}}). I don't think it showcases the
|
||||
elegance of the _Gap Buffer_ in action like a C implementation full of
|
||||
`memmove`s would, but it does makes it short and sweet.
|
||||
|
||||
### Representation
|
||||
|
||||
We'll be representing the gap buffer as an actual list of characters.
|
||||
|
||||
Given that Python doesn't _have_ characters, let's settle for a list of strings,
|
||||
each representing a single character...
|
||||
|
||||
```python
|
||||
Char = str
|
||||
|
||||
class GapBuffer:
|
||||
# List of characters, contains prefix and suffix of string with gap in the middle
|
||||
_buf: list[Char]
|
||||
# The gap is contained between [start, end) (i.e: buf[start:end])
|
||||
_gap_start: int
|
||||
_gap_end: int
|
||||
|
||||
# Visual representation of the gap buffer:
|
||||
# This is a very [ ]long string.
|
||||
# |<----------------------------------------------->| capacity
|
||||
# |<------------>| |<-------->| string
|
||||
# |<------------------->| gap
|
||||
# |<------------>| prefix
|
||||
# |<-------->| suffix
|
||||
def __init__(self, initial_capacity: int = 16) -> None:
|
||||
assert initial_capacity > 0
|
||||
# Initialize an empty gap buffer
|
||||
self._buf = [""] * initial_capacity
|
||||
self._gap_start = 0
|
||||
self._gap_end = initial_capacity
|
||||
```
|
||||
|
||||
### Accessors
|
||||
|
||||
I'm mostly adding these for exposition, and making it easier to write `assert`s
|
||||
later.
|
||||
|
||||
```python
|
||||
@property
|
||||
def capacity(self) -> int:
|
||||
return len(self._buf)
|
||||
|
||||
@property
|
||||
def gap_length(self) -> int:
|
||||
return self._gap_end - self._gap_start
|
||||
|
||||
@property
|
||||
def string_length(self) -> int:
|
||||
return self.capacity - self.gap_length
|
||||
|
||||
@property
|
||||
def prefix_length(self) -> int:
|
||||
return self._gap_start
|
||||
|
||||
@property
|
||||
def suffix_length(self) -> int:
|
||||
return self.capacity - self._gap_end
|
||||
```
|
||||
|
||||
### Growing the buffer
|
||||
|
||||
I've written this method in a somewhat non-idiomatic manner, to make it closer
|
||||
to how it would look in C using `realloc` instead.
|
||||
|
||||
It would be more efficient to use slicing to insert the needed extra capacity
|
||||
directly, instead of making a new buffer and copying characters over.
|
||||
|
||||
```python
|
||||
def grow(self, capacity: int) -> None:
|
||||
assert capacity >= self.capacity
|
||||
# Create a new buffer with the new capacity
|
||||
new_buf = [""] * capacity
|
||||
# Move the prefix/suffix to their place in the new buffer
|
||||
added_capacity = capacity - len(self._buf)
|
||||
new_buf[: self._gap_start] = self._buf[: self._gap_start]
|
||||
new_buf[self._gap_end + added_capacity :] = self._buf[self._gap_end :]
|
||||
# Use the new buffer, account for added capacity
|
||||
self._buf = new_buf
|
||||
self._gap_end += added_capacity
|
||||
```
|
||||
|
||||
### Insertion
|
||||
|
||||
Inserting text at the cursor's position means filling up the gap in the middle
|
||||
of the buffer. To do so we must first make sure that the gap is big enough, or
|
||||
grow the buffer accordingly.
|
||||
|
||||
Then inserting the text is simply a matter of copying its characters in place,
|
||||
and moving the start of the gap further right.
|
||||
|
||||
```python
|
||||
def insert(self, val: str) -> None:
|
||||
# Ensure we have enough space to insert the whole string
|
||||
if len(val) > self.gap_length:
|
||||
self.grow(max(self.capacity * 2, self.string_length + len(val)))
|
||||
# Fill the gap with the given string
|
||||
self._buf[self._gap_start : self._gap_start + len(val)] = val
|
||||
self._gap_start += len(val)
|
||||
```
|
||||
|
||||
### Deletion
|
||||
|
||||
Removing text from the buffer simply expands the gap in the corresponding
|
||||
direction, shortening the string's prefix/suffix. This makes it very cheap.
|
||||
|
||||
The methods are named after the `backspace` and `delete` keys on the keyboard.
|
||||
|
||||
```python
|
||||
def backspace(self, dist: int = 1) -> None:
|
||||
assert dist <= self.prefix_length
|
||||
# Extend gap to the left
|
||||
self._gap_start -= dist
|
||||
|
||||
def delete(self, dist: int = 1) -> None:
|
||||
assert dist <= self.suffix_length
|
||||
# Extend gap to the right
|
||||
self._gap_end += dist
|
||||
```
|
||||
|
||||
### Moving the cursor
|
||||
|
||||
Moving the cursor along the buffer will shift letters from one side of the gap
|
||||
to the other, moving them across from prefix to suffix and back.
|
||||
|
||||
I find Python's list slicing not quite as elegant to read as a `memmove`, though
|
||||
it does make for a very small and efficient implementation.
|
||||
|
||||
```python
|
||||
def left(self, dist: int = 1) -> None:
|
||||
assert dist <= self.prefix_length
|
||||
# Shift the needed number of characters from end of prefix to start of suffix
|
||||
self._buf[self._gap_end - dist : self._gap_end] = self._buf[
|
||||
self._gap_start - dist : self._gap_start
|
||||
]
|
||||
# Adjust indices accordingly
|
||||
self._gap_start -= dist
|
||||
self._gap_end -= dist
|
||||
|
||||
def right(self, dist: int = 1) -> None:
|
||||
assert dist <= self.suffix_length
|
||||
# Shift the needed number of characters from start of suffix to end of prefix
|
||||
self._buf[self._gap_start : self._gap_start + dist] = self._buf[
|
||||
self._gap_end : self._gap_end + dist
|
||||
]
|
||||
# Adjust indices accordingly
|
||||
self._gap_start += dist
|
||||
self._gap_end += dist
|
||||
```
|
|
@ -1,97 +0,0 @@
|
|||
---
|
||||
title: "Bloom Filter"
|
||||
date: 2024-07-14T17:46:40+01:00
|
||||
draft: false # I don't care for draft mode, git has branches for that
|
||||
description: "Probably cool"
|
||||
tags:
|
||||
- algorithms
|
||||
- data structures
|
||||
- python
|
||||
categories:
|
||||
- programming
|
||||
series:
|
||||
- Cool algorithms
|
||||
favorite: false
|
||||
disable_feed: false
|
||||
---
|
||||
|
||||
The [_Bloom Filter_][wiki] is a probabilistic data structure for set membership.
|
||||
|
||||
The filter can be used as an inexpensive first step when querying the actual
|
||||
data is quite costly (e.g: as a first check for expensive cache lookups or large
|
||||
data seeks).
|
||||
|
||||
[wiki]: https://en.wikipedia.org/wiki/Bloom_filter
|
||||
|
||||
<!--more-->
|
||||
|
||||
## What does it do?
|
||||
|
||||
A _Bloom Filter_ can be understood as a hash-set which can either tell you:
|
||||
|
||||
* An element is _not_ part of the set.
|
||||
* An element _may be_ part of the set.
|
||||
|
||||
More specifically, one can tweak the parameters of the filter to make it so that
|
||||
the _false positive_ rate of membership is quite low.
|
||||
|
||||
I won't be going into those calculations here, but they are quite trivial to
|
||||
compute, or one can just look up appropriate values for their use case.
|
||||
|
||||
## Implementation
|
||||
|
||||
I'll be using Python, which has the nifty ability of representing bitsets
|
||||
through its built-in big integers quite easily.
|
||||
|
||||
We'll be assuming a `BIT_COUNT` of 64 here, but the implementation can easily be
|
||||
tweaked to use a different number, or even change it at construction time.
|
||||
|
||||
### Representation
|
||||
|
||||
A `BloomFilter` is just a set of bits and a list of hash functions.
|
||||
|
||||
```python
|
||||
BIT_COUNT = 64
|
||||
|
||||
class BloomFilter[T]:
|
||||
_bits: int
|
||||
_hash_functions: list[Callable[[T], int]]
|
||||
|
||||
def __init__(self, hash_functions: list[Callable[[T], int]]) -> None:
|
||||
# Filter is initially empty
|
||||
self._bits = 0
|
||||
self._hash_functions = hash_functions
|
||||
```
|
||||
|
||||
### Inserting a key
|
||||
|
||||
To add an element to the filter, we take the output from each hash function and
|
||||
use that to set a bit in the filter. This combination of bit will identify the
|
||||
element, which we can use for lookup later.
|
||||
|
||||
```python
|
||||
def insert(self, val: T) -> None:
|
||||
# Iterate over each hash
|
||||
for f in self._hash_functions:
|
||||
n = f(val) % BIT_COUNT
|
||||
# Set the corresponding bit
|
||||
self._bit |= 1 << n
|
||||
```
|
||||
|
||||
### Querying a key
|
||||
|
||||
Because the _Bloom Filter_ does not actually store its elements, but some
|
||||
derived data from hashing them, it can only definitely say if an element _does
|
||||
not_ belong to it. Otherwise, it _may_ be part of the set, and should be checked
|
||||
against the actual underlying store.
|
||||
|
||||
```python
|
||||
def may_contain(self, val: T) -> bool:
|
||||
for f in self._hash_functions:
|
||||
n = f(val) % BIT_COUNT
|
||||
# If one of the bits is unset, the value is definitely not present
|
||||
if not (self._bit & (1 << n)):
|
||||
return False
|
||||
# All bits were matched, `val` is likely to be part of the set
|
||||
return True
|
||||
```
|
|
@ -1,159 +0,0 @@
|
|||
---
|
||||
title: "Treap"
|
||||
date: 2024-07-20T14:12:27+01:00
|
||||
draft: false # I don't care for draft mode, git has branches for that
|
||||
description: "A simpler BST"
|
||||
tags:
|
||||
- algorithms
|
||||
- data structures
|
||||
- python
|
||||
categories:
|
||||
- programming
|
||||
series:
|
||||
- Cool algorithms
|
||||
favorite: false
|
||||
disable_feed: false
|
||||
graphviz: true
|
||||
---
|
||||
|
||||
The [_Treap_][wiki] is a mix between a _Binary Search Tree_ and a _Heap_.
|
||||
|
||||
Like a _Binary Search Tree_, it keeps an ordered set of keys in the shape of a
|
||||
tree, allowing for binary search traversal.
|
||||
|
||||
Like a _Heap_, it associates each node with a priority, making sure that a
|
||||
parent's priority is always higher than any of its children.
|
||||
|
||||
[wiki]: https://en.wikipedia.org/wiki/Treap
|
||||
|
||||
<!--more-->
|
||||
|
||||
## What does it do?
|
||||
|
||||
By randomizing the priority value of each key at insertion time, we ensure a
|
||||
high likelihood that the tree stays _roughly_ balanced, avoiding degenerating to
|
||||
unbalanced O(N) height.
|
||||
|
||||
Here's a sample tree created by inserting integers from 0 to 250 into the tree:
|
||||
|
||||
{{< graphviz file="treap.gv" />}}
|
||||
|
||||
## Implementation
|
||||
|
||||
I'll be keeping the theme for this [series] by using Python to implement the
|
||||
_Treap_. This leads to somewhat annoying code to handle the rotation process,
|
||||
which is easier to do in C using pointers.
|
||||
|
||||
[series]: {{< ref "/series/cool-algorithms/" >}}
|
||||
|
||||
### Representation
|
||||
|
||||
Creating a new `Treap` is easy: the tree starts off empty, waiting for new nodes
|
||||
to insert.
|
||||
|
||||
Each `Node` must keep track of the `key`, the mapped `value`, and the node's
|
||||
`priority` (which is assigned randomly). Finally it must also allow for storing
|
||||
two children (`left` and `right`).
|
||||
|
||||
```python
|
||||
class Node[K, V]:
|
||||
key: K
|
||||
value: V
|
||||
priority: float
|
||||
left: Node[K, V] | None
|
||||
righg: Node[K, V] | None
|
||||
|
||||
def __init__(self, key: K, value: V):
|
||||
# Store key and value, like a normal BST node
|
||||
self.key = key
|
||||
self.value = value
|
||||
# Priority is derived randomly
|
||||
self.priority = random()
|
||||
self.left = None
|
||||
self.right = None
|
||||
|
||||
class Treap[K, V]:
|
||||
_root: Node[K, V] | None
|
||||
|
||||
def __init__(self):
|
||||
# The tree starts out empty
|
||||
self._root = None
|
||||
```
|
||||
|
||||
### Search
|
||||
|
||||
Searching the tree is the same as in any other _Binary Search Tree_.
|
||||
|
||||
```python
|
||||
def get(self, key: K) -> T | None:
|
||||
node = self._root
|
||||
# The usual BST traversal
|
||||
while node is not None:
|
||||
if node.key == key:
|
||||
return node.value
|
||||
elif node.key < key:
|
||||
node = node.right
|
||||
else:
|
||||
node = node.left
|
||||
return None
|
||||
```
|
||||
|
||||
### Insertion
|
||||
|
||||
To insert a new `key` into the tree, we identify which leaf position it should
|
||||
be inserted at. We then generate the node's priority, insert it at this
|
||||
position, and rotate the node upwards until the heap property is respected.
|
||||
|
||||
```python
|
||||
type ChildField = Literal["left, right"]
|
||||
|
||||
def insert(self, key: K, value: V) -> bool:
|
||||
# Empty treap base-case
|
||||
if self._root is None:
|
||||
self._root = Node(key, value)
|
||||
# Signal that we're not overwriting the value
|
||||
return False
|
||||
# Keep track of the parent chain for rotation after insertion
|
||||
parents = []
|
||||
node = self._root
|
||||
while node is not None:
|
||||
# Insert a pre-existing key
|
||||
if node.key == key:
|
||||
node.value = value
|
||||
return True
|
||||
# Go down the tree, keep track of the path through the tree
|
||||
field = "left" if key < node.key else "right"
|
||||
parents.append((node, field))
|
||||
node = getattr(node, field)
|
||||
# Key wasn't found, we're inserting a new node
|
||||
child = Node(key, value)
|
||||
parent, field = parents[-1]
|
||||
setattr(parent, field, child)
|
||||
# Rotate the new node up until we respect the decreasing priority property
|
||||
self._rotate_up(child, parents)
|
||||
# Key wasn't found, signal that we inserted a new node
|
||||
return False
|
||||
|
||||
def _rotate_up(
|
||||
self,
|
||||
node: Node[K, V],
|
||||
parents: list[tuple[Node[K, V], ChildField]],
|
||||
) -> None:
|
||||
while parents:
|
||||
parent, field = parents.pop()
|
||||
# If the parent has higher priority, we're done rotating
|
||||
if parent.priority >= node.priority:
|
||||
break
|
||||
# Check for grand-parent/root of tree edge-case
|
||||
if parents:
|
||||
# Update grand-parent to point to the new rotated node
|
||||
grand_parent, field = parents[-1]
|
||||
setattr(grand_parent, field, node)
|
||||
else:
|
||||
# Point the root to the new rotated node
|
||||
self._root = node
|
||||
other_field = "left" if field == "right" else "right"
|
||||
# Rotate the node up
|
||||
setattr(parent, field, getattr(node, other_field))
|
||||
setattr(node, other_field, parent)
|
||||
```
|
File diff suppressed because it is too large
Load diff
|
@ -1,146 +0,0 @@
|
|||
---
|
||||
title: "Treap, revisited"
|
||||
date: 2024-07-27T14:12:27+01:00
|
||||
draft: false # I don't care for draft mode, git has branches for that
|
||||
description: "An even simpler BST"
|
||||
tags:
|
||||
- algorithms
|
||||
- data structures
|
||||
- python
|
||||
categories:
|
||||
- programming
|
||||
series:
|
||||
- Cool algorithms
|
||||
favorite: false
|
||||
disable_feed: false
|
||||
---
|
||||
|
||||
My [last post]({{< relref "../2024-07-20-treap/index.md" >}}) about the _Treap_
|
||||
showed an implementation using tree rotations, as is commonly done with [AVL
|
||||
Trees][avl] and [Red Black Trees][rb].
|
||||
|
||||
But the _Treap_ lends itself well to a simple and elegant implementation with no
|
||||
tree rotations. This makes it especially easy to implement the removal of a key,
|
||||
rather than the fiddly process of deletion using tree rotations.
|
||||
|
||||
[avl]: https://en.wikipedia.org/wiki/AVL_tree
|
||||
[rb]: https://en.wikipedia.org/wiki/Red%E2%80%93black_tree
|
||||
|
||||
<!--more-->
|
||||
|
||||
## Implementation
|
||||
|
||||
All operations on the tree will be implemented in terms of two fundamental
|
||||
operations: `split` and `merge`.
|
||||
|
||||
We'll be reusing the same structures as in the last post, so let's skip straight
|
||||
to implementing those fundaments, and building on them for `insert` and
|
||||
`delete`.
|
||||
|
||||
### Split
|
||||
|
||||
Splitting a tree means taking a key, and getting the following output:
|
||||
|
||||
* a `left` node, root of the tree of all keys lower than the input.
|
||||
* an extracted `node` which corresponds to the input `key`.
|
||||
* a `right` node, root of the tree of all keys higher than the input.
|
||||
|
||||
```python
|
||||
type OptionalNode[K, V] = Node[K, V] | None
|
||||
|
||||
class SplitResult(NamedTuple):
|
||||
left: OptionalNode
|
||||
node: OptionalNode
|
||||
right: OptionalNode
|
||||
|
||||
def split(root: OptionalNode[K, V], key: K) -> SplitResult:
|
||||
# Base case, empty tree
|
||||
if root is None:
|
||||
return SplitResult(None, None, None)
|
||||
# If we found the key, simply extract left and right
|
||||
if root.key == key:
|
||||
left, right = root.left, root.right
|
||||
root.left, root.right = None, None
|
||||
return SplitResult(left, root, right)
|
||||
# Otherwise, recurse on the corresponding side of the tree
|
||||
if root.key < key:
|
||||
left, node, right = split(root.right, key)
|
||||
root.right = left
|
||||
return SplitResult(root, node, right)
|
||||
if key < root.key:
|
||||
left, node, right = split(root.left, key)
|
||||
root.left = right
|
||||
return SplitResult(left, node, root)
|
||||
raise RuntimeError("Unreachable")
|
||||
```
|
||||
|
||||
### Merge
|
||||
|
||||
Merging a `left` and `right` tree means (cheaply) building a new tree containing
|
||||
both of them. A pre-condition for merging is that the `left` tree is composed
|
||||
entirely of nodes that are lower than any key in `right` (i.e: as in `left` and
|
||||
`right` after a `split`).
|
||||
|
||||
```python
|
||||
def merge(
|
||||
left: OptionalNode[K, V],
|
||||
right: OptionalNode[K, V],
|
||||
) -> OptionalNode[K, V]:
|
||||
# Base cases, left or right being empty
|
||||
if left is None:
|
||||
return right
|
||||
if right is None:
|
||||
return left
|
||||
# Left has higher priority, it must become the root node
|
||||
if left.priority >= right.priority:
|
||||
# We recursively reconstruct its right sub-tree
|
||||
left.right = merge(left.right, right)
|
||||
return left
|
||||
# Right has higher priority, it must become the root node
|
||||
if left.priority < right.priority:
|
||||
# We recursively reconstruct its left sub-tree
|
||||
right.left = merge(left, right.left)
|
||||
return right
|
||||
raise RuntimeError("Unreachable")
|
||||
```
|
||||
|
||||
### Insertion
|
||||
|
||||
Inserting a node into the tree is done in two steps:
|
||||
|
||||
1. `split` the tree to isolate the middle insertion point
|
||||
2. `merge` it back up to form a full tree with the inserted key
|
||||
|
||||
```python
|
||||
def insert(self, key: K, value: V) -> bool:
|
||||
# `left` and `right` come before/after the key
|
||||
left, node, right = split(self._root, key)
|
||||
was_updated: bool
|
||||
# Create the node, or update its value, if the key was already in the tree
|
||||
if node is None:
|
||||
node = Node(key, value)
|
||||
was_updated = False
|
||||
else:
|
||||
node.value = value
|
||||
was_updated = True
|
||||
# Rebuild the tree with a couple of merge operations
|
||||
self._root = merge(left, merge(node, right))
|
||||
# Signal whether the key was already in the key
|
||||
return was_updated
|
||||
```
|
||||
|
||||
### Removal
|
||||
|
||||
Removing a key from the tree is similar to inserting a new key, and forgetting
|
||||
to insert it back: simply `split` the tree and `merge` it back without the
|
||||
extracted middle node.
|
||||
|
||||
```python
|
||||
def remove(self, key: K) -> bool:
|
||||
# `node` contains the key, or `None` if the key wasn't in the tree
|
||||
left, node, right = split(self._root, key)
|
||||
# Put the tree back together, without the extract node
|
||||
self._root = merge(left, right)
|
||||
# Signal whether `key` was mapped in the tree
|
||||
return node is not None
|
||||
```
|
|
@ -1,145 +0,0 @@
|
|||
---
|
||||
title: "Reservoir Sampling"
|
||||
date: 2024-08-02T18:30:56+01:00
|
||||
draft: false # I don't care for draft mode, git has branches for that
|
||||
description: "Elegantly sampling a stream"
|
||||
tags:
|
||||
- algorithms
|
||||
- python
|
||||
categories:
|
||||
- programming
|
||||
series:
|
||||
- Cool algorithms
|
||||
favorite: false
|
||||
disable_feed: false
|
||||
mathjax: true
|
||||
---
|
||||
|
||||
[_Reservoir Sampling_][reservoir] is an [online][online], probabilistic
|
||||
algorithm to uniformly sample $k$ random elements out of a stream of values.
|
||||
|
||||
It's a particularly elegant and small algorithm, only requiring $\Theta(k)$
|
||||
amount of space and a single pass through the stream.
|
||||
|
||||
[reservoir]: https://en.wikipedia.org/wiki/Reservoir_sampling
|
||||
[online]: https://en.wikipedia.org/wiki/Online_algorithm
|
||||
|
||||
<!--more-->
|
||||
|
||||
## Sampling one element
|
||||
|
||||
As an introduction, we'll first focus on fairly sampling one element from the
|
||||
stream.
|
||||
|
||||
```python
|
||||
def sample_one[T](stream: Iterable[T]) -> T:
|
||||
stream_iter = iter(stream)
|
||||
# Sample the first element
|
||||
res = next(stream_iter)
|
||||
for i, val in enumerate(stream_iter, start=1):
|
||||
j = random.randint(0, i)
|
||||
# Replace the sampled element with probability 1/(i + 1)
|
||||
if j == 0:
|
||||
res = val
|
||||
# Return the randomly sampled element
|
||||
return res
|
||||
```
|
||||
|
||||
### Proof
|
||||
|
||||
Let's now prove that this algorithm leads to a fair sampling of the stream.
|
||||
|
||||
We'll be doing proof by induction.
|
||||
|
||||
#### Hypothesis $H_N$
|
||||
|
||||
After iterating through the first $N$ items in the stream,
|
||||
each of them has had an equal $\frac{1}{N}$ probability of being selected as
|
||||
`res`.
|
||||
|
||||
#### Base Case $H_1$
|
||||
|
||||
We can trivially observe that the first element is always assigned to `res`,
|
||||
$\frac{1}{1} = 1$, the hypothesis has been verified.
|
||||
|
||||
#### Inductive Case
|
||||
|
||||
For a given $N$, let us assume that $H_N$ holds. Let us now look at the events
|
||||
of loop iteration where `i = N` (i.e: observation of the $N + 1$-th item in the
|
||||
stream).
|
||||
|
||||
`j = random.randint(0, i)` uniformly selects a value in the range $[0, i]$,
|
||||
a.k.a $[0, N]$. We then have two cases:
|
||||
|
||||
* `j == 0`, with probability $\frac{1}{N + 1}$: we select `val` as the new
|
||||
reservoir element `res`.
|
||||
|
||||
* `j != 0`, with probability $\frac{N}{N + 1}$: we keep the previous value of
|
||||
`res`. By $H_N$, any of the first $N$ elements had a $\frac{1}{N}$ probability
|
||||
of being `res` before at the start of the loop, each element now has a
|
||||
probability $\frac{1}{N} \cdot \frac{N}{N + 1} = \frac{1}{N + 1}$ of being the
|
||||
element.
|
||||
|
||||
And thus, we have proven $H_{N + 1}$ at the end of the loop.
|
||||
|
||||
## Sampling $k$ element
|
||||
|
||||
The code for sampling $k$ elements is very similar to the one-element case.
|
||||
|
||||
```python
|
||||
def sample[T](stream: Iterable[T], k: int = 1) -> list[T]:
|
||||
stream_iter = iter(stream)
|
||||
# Retain the first 'k' elements in the reservoir
|
||||
res = list(itertools.islice(stream_iter, k))
|
||||
for i, val in enumerate(stream_iter, start=k):
|
||||
j = random.randint(0, i)
|
||||
# Replace one element at random with probability k/(i + 1)
|
||||
if j < k:
|
||||
res[j] = val
|
||||
# Return 'k' randomly sampled elements
|
||||
return res
|
||||
```
|
||||
|
||||
### Proof
|
||||
|
||||
Let us once again do a proof by induction, assuming the stream contains at least
|
||||
$k$ items.
|
||||
|
||||
#### Hypothesis $H_N$
|
||||
|
||||
After iterating through the first $N$ items in the stream, each of them has had
|
||||
an equal $\frac{k}{N}$ probability of being sampled from the stream.
|
||||
|
||||
#### Base Case $H_k$
|
||||
|
||||
We can trivially observe that the first $k$ element are sampled at the start of
|
||||
the algorithm, $\frac{k}{k} = 1$, the hypothesis has been verified.
|
||||
|
||||
#### Inductive Case
|
||||
|
||||
For a given $N$, let us assume that $H_N$ holds. Let us now look at the events
|
||||
of the loop iteration where `i = N`, in order to prove $H_{N + 1}$.
|
||||
|
||||
`j = random.randint(0, i)` uniformly selects a value in the range $[0, i]$,
|
||||
a.k.a $[0, N]$. We then have three cases:
|
||||
|
||||
* `j >= k`, with probability $1 - \frac{k}{N + 1}$: we do not modify the
|
||||
sampled reservoir at all.
|
||||
|
||||
* `j < k`, with probability $\frac{k}{N + 1}$: we sample the new element to
|
||||
replace the `j`-th element of the reservoir. Therefore for any element
|
||||
$e \in [0, k[$ we can either have:
|
||||
* $j = e$: the element _is_ replaced, probability $\frac{1}{k}$.
|
||||
* $j \neq e$: the element is _not_ replaced, probability $\frac{k - 1}{k}$.
|
||||
|
||||
We can now compute the probability that a previously sampled element is kept in
|
||||
the reservoir:
|
||||
$1 - \frac{k}{N + 1} + \frac{k}{N + 1} \cdot \frac{k - 1}{k} = \frac{N}{N + 1}$.
|
||||
|
||||
By $H_N$, any of the first $N$ elements had a $\frac{k}{N}$ probability
|
||||
of being sampled before at the start of the loop, each element now has a
|
||||
probability $\frac{k}{N} \cdot \frac{N}{N + 1} = \frac{k}{N + 1}$ of being the
|
||||
element.
|
||||
|
||||
We have now proven that all elements have a probability $\frac{k}{N + 1}$ of
|
||||
being sampled at the end of the loop, therefore $H_{N + 1}$ has been verified.
|
|
@ -1,472 +0,0 @@
|
|||
---
|
||||
title: "k-d Tree"
|
||||
date: 2024-08-10T11:50:33+01:00
|
||||
draft: false # I don't care for draft mode, git has branches for that
|
||||
description: "Points in spaaaaace!"
|
||||
tags:
|
||||
- algorithms
|
||||
- data structures
|
||||
- python
|
||||
categories:
|
||||
- programming
|
||||
series:
|
||||
- Cool algorithms
|
||||
favorite: false
|
||||
disable_feed: false
|
||||
---
|
||||
|
||||
The [_k-d Tree_][wiki] is a useful way to map points in space and make them
|
||||
efficient to query.
|
||||
|
||||
I ran into them during my studies in graphics, as they are one of the
|
||||
possible acceleration structures for [ray-casting] operations.
|
||||
|
||||
[wiki]: https://en.wikipedia.org/wiki/K-d_tree
|
||||
[ray-casting]: https://en.wikipedia.org/wiki/Ray_casting
|
||||
|
||||
<!--more-->
|
||||
|
||||
## Implementation
|
||||
|
||||
As usual, this will be in Python, though its lack of proper discriminated enums
|
||||
makes it more verbose than would otherwise be necessary.
|
||||
|
||||
### Pre-requisites
|
||||
|
||||
Let's first define what kind of space our _k-d Tree_ is dealing with. In this
|
||||
instance $k = 3$ just like in the normal world.
|
||||
|
||||
```python
|
||||
class Point(NamedTuple):
|
||||
x: float
|
||||
y: float
|
||||
z: float
|
||||
|
||||
class Axis(IntEnum):
|
||||
X = 0
|
||||
Y = 1
|
||||
Z = 2
|
||||
|
||||
def next(self) -> Axis:
|
||||
# Each level of the tree is split along a different axis
|
||||
return Axis((self + 1) % 3)
|
||||
```
|
||||
|
||||
### Representation
|
||||
|
||||
The tree is represented by `KdTree`, each of its leaf nodes is a `KdLeafNode`
|
||||
and its inner nodes are `KdSplitNode`s.
|
||||
|
||||
For each point in space, the tree can also keep track of an associated value,
|
||||
similar to a dictionary or other mapping data structure. Hence we will make our
|
||||
`KdTree` generic to this mapped type `T`.
|
||||
|
||||
#### Leaf node
|
||||
|
||||
A leaf node contains a number of points that were added to the tree. For each
|
||||
point, we also track their mapped value, hence the `dict[Point, T]`.
|
||||
|
||||
```python
|
||||
class KdLeafNode[T]:
|
||||
points: dict[Point, T]
|
||||
|
||||
def __init__(self):
|
||||
self.points = {}
|
||||
```
|
||||
|
||||
#### Split node
|
||||
|
||||
An inner node must partition the space into two sub-spaces along a given axis
|
||||
and mid-point (thus defining a plane). All points that are "to the left" of the
|
||||
plane will be kept in one child, while all the points "to the right" will be in
|
||||
the other. Similar to a [_Binary Search Tree_][bst]'s inner nodes.
|
||||
|
||||
[bst]: https://en.wikipedia.org/wiki/Binary_search_tree
|
||||
|
||||
```python
|
||||
class KdSplitNode[T]:
|
||||
axis: Axis
|
||||
mid: float
|
||||
children: tuple[KdTreeNode[T], KdTreeNode[T]]
|
||||
|
||||
# Convenience function to index into the child which contains `point`
|
||||
def _index(self, point: Point) -> int:
|
||||
return 0 if point[self.axis] <= self.mid else 1
|
||||
```
|
||||
|
||||
#### Tree
|
||||
|
||||
The tree itself is merely a wrapper around its inner nodes.
|
||||
|
||||
Once annoying issue about writing this in Python is the lack of proper
|
||||
discriminated enum types. So we need to create a wrapper type for the nodes
|
||||
(`KdNode`) to allow for splitting when updating the tree.
|
||||
|
||||
```python
|
||||
class KdNode[T]:
|
||||
# Wrapper around leaf/inner nodes, the poor man's discriminated enum
|
||||
inner: KdLeafNode[T] | KdSplitNode[T]
|
||||
|
||||
def __init__(self):
|
||||
self.inner = KdLeafNode()
|
||||
|
||||
# Convenience constructor used when splitting a node
|
||||
@classmethod
|
||||
def from_items(cls, items: Iterable[tuple[Point, T]]) -> KdNode[T]:
|
||||
res = cls()
|
||||
res.inner.points.update(items)
|
||||
return res
|
||||
|
||||
class KdTree[T]:
|
||||
_root: KdNode[T]
|
||||
|
||||
def __init__(self):
|
||||
# Tree starts out empty
|
||||
self._root = KdNode()
|
||||
```
|
||||
|
||||
### Inserting a point
|
||||
|
||||
To add a point to the tree, we simply recurse from node to node, similar to a
|
||||
_BST_'s insertion algorithm. Once we've found the correct leaf node to insert
|
||||
our point into, we simply do so.
|
||||
|
||||
If that leaf node goes over the maximum number of points it can store, we must
|
||||
then split it along an axis, cycling between `X`, `Y`, and `Z` at each level of
|
||||
the tree (i.e: splitting along the `X` axis on the first level, then `Y` on the
|
||||
second, then `Z` after that, and then `X`, etc...).
|
||||
|
||||
```python
|
||||
# How many points should be stored in a leaf node before being split
|
||||
MAX_CAPACITY = 32
|
||||
|
||||
def median(values: Iterable[float]) -> float:
|
||||
sorted_values = sorted(values)
|
||||
mid_point = len(sorted_values) // 2
|
||||
if len(sorted_values) % 2 == 1:
|
||||
return sorted_values[mid_point]
|
||||
a, b = sorted_values[mid_point], sorted_values[mid_point + 1]
|
||||
return a + (b - a) / 2
|
||||
|
||||
def partition[T](
|
||||
pred: Callable[[T], bool],
|
||||
iterable: Iterable[T]
|
||||
) -> tuple[list[T], list[T]]:
|
||||
truths, falses = [], []
|
||||
for v in iterable:
|
||||
(truths if pred(v) else falses).append(v)
|
||||
return truths, falses
|
||||
|
||||
def split_leaf[T](node: KdLeafNode[T], axis: Axis) -> KdSplitNode[T]:
|
||||
# Find the median value for the given axis
|
||||
mid = median(p[axis] for p in node.points)
|
||||
# Split into left/right children according to the mid-point and axis
|
||||
left, right = partition(lambda kv: kv[0][axis] <= mid, node.points.items())
|
||||
return KdSplitNode(
|
||||
split_axis,
|
||||
mid,
|
||||
(KdNode.from_items(left), KdNode.from_items(right)),
|
||||
)
|
||||
|
||||
class KdTree[T]:
|
||||
def insert(self, point: Point, val: T) -> bool:
|
||||
# Forward to the root node, choose `X` as the first split axis
|
||||
return self._root.insert(point, val, Axis.X)
|
||||
|
||||
class KdLeafNode[T]:
|
||||
def insert(self, point: Point, val: T, split_axis: Axis) -> bool:
|
||||
# Check whether we're overwriting a previous value
|
||||
was_mapped = point in self.points
|
||||
# Store the corresponding value
|
||||
self.points[point] = val
|
||||
# Return whether we've performed an overwrite
|
||||
return was_mapped
|
||||
|
||||
class KdSplitNode[T]:
|
||||
def insert(self, point: Point, val: T, split_axis: Axis) -> bool:
|
||||
# Find the child which contains the point
|
||||
child = self.children[self._index(point)]
|
||||
# Recurse into it, choosing the next split axis
|
||||
return child.insert(point, val, split_axis.next())
|
||||
|
||||
class KdNode[T]:
|
||||
def insert(self, point: Point, val: T, split_axis: Axis) -> bool:
|
||||
# Add the point to the wrapped node...
|
||||
res = self.inner.insert(point, val, split_axis)
|
||||
# ... And take care of splitting leaf nodes when necessary
|
||||
if (
|
||||
isinstance(self.inner, KdLeafNode)
|
||||
and len(self.inner.points) > MAX_CAPACITY
|
||||
):
|
||||
self.inner = split_leaf(self.inner, split_axis)
|
||||
return res
|
||||
```
|
||||
|
||||
### Searching for a point
|
||||
|
||||
Looking for a given point in the tree look very similar to a _BST_'s search,
|
||||
each leaf node dividing the space into two sub-spaces, only one of which
|
||||
contains the point.
|
||||
|
||||
```python
|
||||
class KdTree[T]:
|
||||
def lookup(self, point: Point) -> T | None:
|
||||
# Forward to the root node
|
||||
return self._root.lookup(point)
|
||||
|
||||
class KdNode[T]:
|
||||
def lookup(self, point: Point) -> T | None:
|
||||
# Forward to the wrapped node
|
||||
return self.inner.lookup(point)
|
||||
|
||||
class KdLeafNode[T]:
|
||||
def lookup(self, point: Point) -> T | None:
|
||||
# Simply check whether we've stored the point in this leaf
|
||||
return self.points.get(point)
|
||||
|
||||
class KdSplitNode[T]:
|
||||
def lookup(self, point: Point) -> T | None:
|
||||
# Recurse into the child which contains the point
|
||||
return self.children[self._index(point)].lookup(point)
|
||||
```
|
||||
|
||||
### Closest points
|
||||
|
||||
Now to look at the most interesting operation one can do on a _k-d Tree_:
|
||||
querying for the objects which are closest to a given point (i.e: the [Nearest
|
||||
neighbour search][nns].
|
||||
|
||||
This is a more complicated algorithm, which will also need some modifications to
|
||||
current _k-d Tree_ implementation in order to track just a bit more information
|
||||
about the points it contains.
|
||||
|
||||
[nns]: https://en.wikipedia.org/wiki/Nearest_neighbor_search
|
||||
|
||||
#### A notion of distance
|
||||
|
||||
To search for the closest points to a given origin, we first need to define
|
||||
which [distance](https://en.wikipedia.org/wiki/Distance) we are using in our
|
||||
space.
|
||||
|
||||
For this example, we'll simply be using the usual definition of [(Euclidean)
|
||||
distance][euclidean-distance].
|
||||
|
||||
[euclidean-distance]: https://en.wikipedia.org/wiki/Euclidean_distance
|
||||
|
||||
```python
|
||||
def dist(point: Point, other: Point) -> float:
|
||||
return sqrt(sum((a - b) ** 2 for a, b in zip(self, other)))
|
||||
```
|
||||
|
||||
#### Tracking the tree's boundaries
|
||||
|
||||
To make the query efficient, we'll need to track the tree's boundaries: the
|
||||
bounding box of all points contained therein. This will allow us to stop the
|
||||
search early once we've found enough points and can be sure that the rest of the
|
||||
tree is too far away to qualify.
|
||||
|
||||
For this, let's define the `AABB` (Axis-Aligned Bounding Box) class.
|
||||
|
||||
```python
|
||||
class Point(NamedTuple):
|
||||
# Convenience function to replace the coordinate along a given dimension
|
||||
def replace(self, axis: Axis, new_coord: float) -> Point:
|
||||
coords = list(self)
|
||||
coords[axis] = new_coord
|
||||
return Point(coords)
|
||||
|
||||
class AABB(NamedTuple):
|
||||
# Lowest coordinates in the box
|
||||
low: Point
|
||||
# Highest coordinates in the box
|
||||
high: Point
|
||||
|
||||
# An empty box
|
||||
@classmethod
|
||||
def empty(cls) -> AABB:
|
||||
return cls(
|
||||
Point(*(float("inf"),) * 3),
|
||||
Point(*(float("-inf"),) * 3),
|
||||
)
|
||||
|
||||
# Split the box into two along a given axis for a given mid-point
|
||||
def split(axis: Axis, mid: float) -> tuple[AABB, AABB]:
|
||||
assert self.low[axis] <= mid <= self.high[axis]
|
||||
return (
|
||||
AABB(self.low, self.high.replace(axis, mid)),
|
||||
AABB(self.low.replace(axis, mid), self.high),
|
||||
)
|
||||
|
||||
# Extend a box to contain a given point
|
||||
def extend(self, point: Point) -> None:
|
||||
low = NamedTuple(*(map(min, zip(self.low, point))))
|
||||
high = NamedTuple(*(map(max, zip(self.high, point))))
|
||||
return AABB(low, high)
|
||||
|
||||
# Return the shortest between a given point and the box
|
||||
def dist_to_point(self, point: Point) -> float:
|
||||
deltas = (
|
||||
max(self.low[axis] - point[axis], 0, point[axis] - self.high[axis])
|
||||
for axis in Axis
|
||||
)
|
||||
return dist(Point(0, 0, 0), Point(*deltas))
|
||||
```
|
||||
|
||||
And do the necessary modifications to the `KdTree` to store the bounding box and
|
||||
update it as we add new points.
|
||||
|
||||
```python
|
||||
class KdTree[T]:
|
||||
_root: KdNode[T]
|
||||
# New field: to keep track of the tree's boundaries
|
||||
_aabb: AABB
|
||||
|
||||
def __init__(self):
|
||||
self._root = KdNode()
|
||||
# Initialize the empty tree with an empty bounding box
|
||||
self._aabb = AABB.empty()
|
||||
|
||||
def insert(self, point: Point, val: T) -> bool:
|
||||
# Extend the AABB for our k-d Tree when adding a point to it
|
||||
self._aabb = self._aabb.extend(point)
|
||||
return self._root.insert(point, val, Axis.X)
|
||||
```
|
||||
|
||||
#### `MaxHeap`
|
||||
|
||||
Python's builtin [`heapq`][heapq] module provides the necessary functions to
|
||||
create and interact with a [_Priority Queue_][priority-queue], in the form of a
|
||||
[_Binary Heap_][binary-heap].
|
||||
|
||||
Unfortunately, Python's library maintains a _min-heap_, which keeps the minimum
|
||||
element at the root. For this algorithm, we're interested in having a
|
||||
_max-heap_, with the maximum at the root.
|
||||
|
||||
Thankfully, one can just reverse the comparison function for each element to
|
||||
convert between the two. Let's write a `MaxHeap` class making use of this
|
||||
library, with a `Reverse` wrapper class to reverse the order of elements
|
||||
contained within it (similar to [Rust's `Reverse`][reverse]).
|
||||
|
||||
[binary-heap]: https://en.wikipedia.org/wiki/Binary_heap
|
||||
[heapq]: https://docs.python.org/3/library/heapq.html
|
||||
[priority-queue]: https://en.wikipedia.org/wiki/Priority_queue
|
||||
[reverse]: https://doc.rust-lang.org/std/cmp/struct.Reverse.html
|
||||
|
||||
```python
|
||||
# Reverses the wrapped value's ordering
|
||||
@functools.total_ordering
|
||||
class Reverse[T]:
|
||||
value: T
|
||||
|
||||
def __init__(self, value: T):
|
||||
self.value = value
|
||||
|
||||
def __lt__(self, other: Reverse[T]) -> bool:
|
||||
return self.value > other.value
|
||||
|
||||
def __eq__(self, other: Reverse[T]) -> bool:
|
||||
return self.value == other.value
|
||||
|
||||
class MaxHeap[T]:
|
||||
_heap: list[Reverse[T]]
|
||||
|
||||
def __init__(self):
|
||||
self._heap = []
|
||||
|
||||
def __len__(self) -> int:
|
||||
return len(self._heap)
|
||||
|
||||
def __iter__(self) -> Iterator[T]:
|
||||
yield from (item.value for item in self._heap)
|
||||
|
||||
# Push a value on the heap
|
||||
def push(self, value: T) -> None:
|
||||
heapq.heappush(self._heap, Reverse(value))
|
||||
|
||||
# Peek at the current maximum value
|
||||
def peek(self) -> T:
|
||||
return self._heap[0].value
|
||||
|
||||
# Pop and return the highest value
|
||||
def pop(self) -> T:
|
||||
return heapq.heappop(self._heap).value
|
||||
|
||||
# Pushes a value onto the heap, pops and returns the highest value
|
||||
def pushpop(self, value: T) -> None:
|
||||
return heapq.heappushpop(self._heap, Reverse(value)).value
|
||||
```
|
||||
|
||||
#### The actual Implementation
|
||||
|
||||
Now that we have written the necessary building blocks, let's tackle the
|
||||
Implementation of `closest` for our _k-d Tree_.
|
||||
|
||||
```python
|
||||
# Wrapper type for closest points, ordered by `distance`
|
||||
@dataclasses.dataclass(order=True)
|
||||
class ClosestPoint[T](NamedTuple):
|
||||
point: Point = field(compare=False)
|
||||
value: T = field(compare=False)
|
||||
distance: float
|
||||
|
||||
class KdTree[T]:
|
||||
def closest(self, point: Point, n: int = 1) -> list[ClosestPoint[T]]:
|
||||
assert n > 0
|
||||
# Create the output heap
|
||||
res = MaxHeap()
|
||||
# Recurse onto the root node
|
||||
self._root.closest(point, res, n, self._aabb)
|
||||
# Return the resulting list, from closest to farthest
|
||||
return sorted(res)
|
||||
|
||||
class KdNode[T]:
|
||||
def closest(
|
||||
self,
|
||||
point: Point,
|
||||
out: MaxHeap[ClosestPoint[T]],
|
||||
n: int,
|
||||
bounds: AABB,
|
||||
) -> None:
|
||||
# Forward to the wrapped node
|
||||
self.inner.closest(point, out, n, bounds)
|
||||
|
||||
class KdLeafNode[T]:
|
||||
def closest(
|
||||
self,
|
||||
point: Point,
|
||||
out: MaxHeap[ClosestPoint[T]],
|
||||
n: int,
|
||||
bounds: AABB,
|
||||
) -> None:
|
||||
# At the leaf, simply iterate over all points and add them to the heap
|
||||
for p, val in self.points.items():
|
||||
item = ClosestPoint(p, val, dist(p, point))
|
||||
if len(out) < n:
|
||||
# If the heap isn't full, just push
|
||||
out.push(item)
|
||||
elif out.peek().distance > item.distance:
|
||||
# Otherwise, push and pop to keep the heap at `n` elements
|
||||
out.pushpop(item)
|
||||
|
||||
class KdSplitNode[T]:
|
||||
def closest(
|
||||
self,
|
||||
point: Point,
|
||||
out: list[ClosestPoint[T]],
|
||||
n: int,
|
||||
bounds: AABB,
|
||||
) -> None:
|
||||
index = self._index(point)
|
||||
children_bounds = bounds.split(self.axis, self.mid)
|
||||
# Iterate over the child which contains the point, then its neighbour
|
||||
for i in (index, 1 - index):
|
||||
child, bounds = self.children[i], children_bounds[i]
|
||||
# `min_dist` is 0 for the first child, and the minimum distance of
|
||||
# all points contained in the second child
|
||||
min_dist = bounds.dist_to_point(point)
|
||||
# If the heap is at capacity and the child to inspect too far, stop
|
||||
if len(out) == n and min_dist > out.peek().distance:
|
||||
return
|
||||
# Otherwise, recurse
|
||||
child.closest(point, out, n, bounds)
|
||||
```
|
|
@ -1,112 +0,0 @@
|
|||
---
|
||||
title: "Kd Tree Revisited"
|
||||
date: 2024-08-17T14:20:22+01:00
|
||||
draft: false # I don't care for draft mode, git has branches for that
|
||||
description: "Simplifying the nearest neighbour search"
|
||||
tags:
|
||||
- algorithms
|
||||
- data structures
|
||||
- python
|
||||
categories:
|
||||
- programming
|
||||
series:
|
||||
- Cool algorithms
|
||||
favorite: false
|
||||
disable_feed: false
|
||||
---
|
||||
|
||||
After giving it a bit of thought, I've found a way to simplify the nearest
|
||||
neighbour search (i.e: the `closest` method) for the `KdTree` I implemented in
|
||||
[my previous post]({{< relref "../2024-08-10-kd-tree/index.md" >}}).
|
||||
|
||||
<!--more-->
|
||||
|
||||
## The improvement
|
||||
|
||||
That post implemented the nearest neighbour search by keeping track of the
|
||||
tree's boundaries (through `AABB`), and each of its sub-trees (through
|
||||
`AABB.split`), and testing for the early exit condition by computing the
|
||||
distance of the search's origin to each sub-tree's boundaries.
|
||||
|
||||
Instead of _explicitly_ keeping track of each sub-tree's boundaries, we can
|
||||
implicitly compute it when recursing down the tree.
|
||||
|
||||
To check for the distance between the queried point and the splitting plane of
|
||||
inner nodes: we simply need to project the origin onto that plane, thus giving
|
||||
us a minimal bound on the distance of the points stored on the other side.
|
||||
|
||||
This can be easily computed from the `axis` and `mid` values which are stored in
|
||||
the inner nodes: to project the node on the plane we simply replace its
|
||||
coordinate for this axis by `mid`.
|
||||
|
||||
## Simplified search
|
||||
|
||||
With that out of the way, let's now see how `closest` can be implemented without
|
||||
needing to track the tree's `AABB` at the root:
|
||||
|
||||
```python
|
||||
# Wrapper type for closest points, ordered by `distance`
|
||||
@dataclasses.dataclass(order=True)
|
||||
class ClosestPoint[T](NamedTuple):
|
||||
point: Point = field(compare=False)
|
||||
value: T = field(compare=False)
|
||||
distance: float
|
||||
|
||||
class KdTree[T]:
|
||||
def closest(self, point: Point, n: int = 1) -> list[ClosestPoint[T]]:
|
||||
assert n > 0
|
||||
res = MaxHeap()
|
||||
# Instead of passing an `AABB`, we give an initial projection point,
|
||||
# the query origin itself (since we haven't visited any split node yet)
|
||||
self._root.closest(point, res, n, point)
|
||||
return sorted(res)
|
||||
|
||||
class KdNode[T]:
|
||||
def closest(
|
||||
self,
|
||||
point: Point,
|
||||
out: MaxHeap[ClosestPoint[T]],
|
||||
n: int,
|
||||
projection: Point,
|
||||
) -> None:
|
||||
# Same implementation
|
||||
self.inner.closest(point, out, n, bounds)
|
||||
|
||||
class KdLeafNode[T]:
|
||||
def closest(
|
||||
self,
|
||||
point: Point,
|
||||
out: MaxHeap[ClosestPoint[T]],
|
||||
n: int,
|
||||
projection: Point,
|
||||
) -> None:
|
||||
# Same implementation
|
||||
for p, val in self.points.items():
|
||||
item = ClosestPoint(p, val, dist(p, point))
|
||||
if len(out) < n:
|
||||
out.push(item)
|
||||
elif out.peek().distance > item.distance:
|
||||
out.pushpop(item)
|
||||
|
||||
class KdSplitNode[T]:
|
||||
def closest(
|
||||
self,
|
||||
point: Point,
|
||||
out: list[ClosestPoint[T]],
|
||||
n: int,
|
||||
projection: Point,
|
||||
) -> None:
|
||||
index = self._index(point)
|
||||
self.children[index].closest(point, out, n, projection)
|
||||
# Project onto the splitting plane, for a minimum distance to its points
|
||||
projection = projection.replace(self.axis, self.mid)
|
||||
# If we're at capacity and can't possibly find any closer points, exit
|
||||
if len(out) == n and dist(point, projection) > out.peek().distance:
|
||||
return
|
||||
# Otherwise recurse on the other side to check for nearer neighbours
|
||||
self.children[1 - index].closest(point, out, n, projection)
|
||||
```
|
||||
|
||||
As you can see, the main difference is in `KdSplitNode`'s implementation, where
|
||||
we can quickly compute the minimum distance between the search's origin and all
|
||||
potential points in that subspace.
|
|
@ -16,7 +16,7 @@ favorite: false
|
|||
The flyweight is a well-known
|
||||
[GoF](https://en.wikipedia.org/wiki/Design_Patterns) design pattern.
|
||||
|
||||
Its intent is to minimize memory usage by reducing the number of instantiations
|
||||
It's intent is to minimize memory usage by reducing the number of instantiations
|
||||
of a given object.
|
||||
|
||||
I will show you how to implement a robust flyweight in C++, as well as a way to
|
|
@ -2,12 +2,14 @@
|
|||
title: "Git Basics"
|
||||
date: 2020-12-07 18:54:31+0100
|
||||
draft: false # I don't care for draft mode, git has branches for that
|
||||
description: "The next step after the basics"
|
||||
description: ""
|
||||
tags:
|
||||
- git
|
||||
- cli
|
||||
categories:
|
||||
- programming
|
||||
series:
|
||||
- Git basics
|
||||
favorite: false
|
||||
---
|
||||
|
||||
|
@ -134,7 +136,7 @@ branch.
|
|||
#### Fixup, a practical example
|
||||
|
||||
A specific kind of squashing which I use frequently is the notion of `fixup`s.
|
||||
Say you've committed a change (*A*), and later on notice that it is missing
|
||||
Say you've commited a change (*A*), and later on notice that it is missing
|
||||
a part of the changeset. You can decide to commit that missing part (*A-bis*)
|
||||
and annotate it to mean that it is linked to *A*.
|
||||
|
||||
|
@ -184,7 +186,7 @@ After applying the rebase, you find yourself with the complete change inside
|
|||
|
||||
This is especially useful when you want to apply suggestion on a merge request
|
||||
after it was reviewed. You can keep a clean history without those pesky `Apply
|
||||
suggestion ...` commits being part of your history.
|
||||
suggestion ...` commmits being part of your history.
|
||||
|
||||
### Lost commits and the reflog
|
||||
|
||||
|
@ -315,7 +317,7 @@ easily choose which parts of your changes should end up in the same commit.
|
|||
Here's a list of commands that you should read-up on, but I won't be presenting
|
||||
further:
|
||||
|
||||
* `git bisect`
|
||||
* `git bissect`
|
||||
* `git rerere`
|
||||
* `git stash`
|
||||
* and more...
|
|
@ -8,8 +8,6 @@ tags:
|
|||
categories:
|
||||
favorite: false
|
||||
tikz: true
|
||||
graphviz: true
|
||||
mermaid: true
|
||||
---
|
||||
|
||||
## Test post please ignore
|
||||
|
@ -42,29 +40,6 @@ echo hello world | cut -d' ' -f 1
|
|||
\end{tikzpicture}
|
||||
{{% /tikz %}}
|
||||
|
||||
### Graphviz support
|
||||
|
||||
{{% graphviz %}}
|
||||
graph {
|
||||
a -- b
|
||||
b -- c
|
||||
c -- a
|
||||
}
|
||||
{{% /graphviz %}}
|
||||
|
||||
### Mermaid support
|
||||
|
||||
{{% mermaid %}}
|
||||
graph TD
|
||||
A[Enter Chart Definition] --> B(Preview)
|
||||
B --> C{decide}
|
||||
C --> D[Keep]
|
||||
C --> E[Edit Definition]
|
||||
E --> B
|
||||
D --> F[Save Image and Code]
|
||||
F --> B
|
||||
{{% /graphviz %}}
|
||||
|
||||
### Spoilers
|
||||
|
||||
{{% spoiler "Don't open me" %}}
|
|
@ -14,8 +14,8 @@ favorite: false
|
|||
---
|
||||
|
||||
Coming back from our last post about [generic flyweights in C++]({{< relref
|
||||
"../2020-07-16-generic-flyweight-cpp/index.md" >}}), we can write a flyweight
|
||||
that can be used with any abstract base classes.
|
||||
"generic-flyweight-cpp.md" >}}), we can write a flyweight that can be used with
|
||||
any abstract base classes.
|
||||
|
||||
<!--more-->
|
||||
|
||||
|
@ -68,7 +68,7 @@ public:
|
|||
const std::type_index lhs_i(lhs);
|
||||
const std::type_index rhs_i(rhs);
|
||||
if (lhs_i != rhs_i)
|
||||
return lhs_i < rhs_i;
|
||||
returh lhs_i < rhs_i;
|
||||
// We are now assured that both classes have the same type
|
||||
return less_than(rhs);
|
||||
}
|
|
@ -8,19 +8,7 @@ disable_feed: true
|
|||
A few of my Amazon wish lists in case you want to give me a gift.
|
||||
|
||||
* [Wish list](https://www.amazon.fr/hz/wishlist/ls/1FT0IO9JJTX57)
|
||||
* ~~[Board games](https://www.amazon.fr/hz/wishlist/ls/2NY50W36THGMW)~~
|
||||
* See the [Board Game Geek list](https://boardgamegeek.com/wishlist/Ambroisie)
|
||||
which is better curated and more up-to-date
|
||||
* [Board games](https://www.amazon.fr/hz/wishlist/ls/2NY50W36THGMW)
|
||||
* [Cooking & kitchenware](https://www.amazon.fr/hz/wishlist/ls/2MNRCLPNABZSU)
|
||||
* [Data Structures & Algorithms](https://www.amazon.fr/hz/wishlist/ls/2XZPQSBOGOFC3)
|
||||
* [Programming](https://www.amazon.fr/hz/wishlist/ls/1R4KFV4H2D8IF)
|
||||
* Other items:
|
||||
* [Chef's presses](https://www.thechefspress.com/shop)
|
||||
* [UK link](https://www.kitchenprovisions.co.uk/products/the-chefs-press-8oz-13oz)
|
||||
* [FR link](https://www.thekitchenlab.fr/fr/p/la-presse-du-chef-poids-de-friture-bruce-hill-13-oz/)
|
||||
* [Combustion Inc thermometer and
|
||||
display](https://combustion.inc/products/predictive-thermometer-gen2-wifi-display)
|
||||
* [Get the one with the range extender if you *really* want to spoil
|
||||
me](https://combustion.inc/products/predictive-thermometer-gen2-wifi-booster-wifi-display)
|
||||
* [Cannelés
|
||||
molds](https://www.laboetgato.fr/en/moules-a-canneles/13964-mould-for-canneles-non-polished-copper-o-45-mm-3333331010026.html)
|
||||
|
|
83
flake.lock
83
flake.lock
|
@ -1,68 +1,28 @@
|
|||
{
|
||||
"nodes": {
|
||||
"flake-compat": {
|
||||
"flake": false,
|
||||
"locked": {
|
||||
"lastModified": 1696426674,
|
||||
"narHash": "sha256-kvjfFW7WAETZlt09AgDn1MrtKzP7t90Vf7vypd3OL1U=",
|
||||
"owner": "edolstra",
|
||||
"repo": "flake-compat",
|
||||
"rev": "0f9255e01c2351cc7d116c072cb317785dd33b33",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
"owner": "edolstra",
|
||||
"repo": "flake-compat",
|
||||
"type": "github"
|
||||
}
|
||||
},
|
||||
"futils": {
|
||||
"inputs": {
|
||||
"systems": "systems"
|
||||
},
|
||||
"locked": {
|
||||
"lastModified": 1710146030,
|
||||
"narHash": "sha256-SZ5L6eA7HJ/nmkzGG7/ISclqe6oZdOZTNoesiInkXPQ=",
|
||||
"lastModified": 1622445595,
|
||||
"narHash": "sha256-m+JRe6Wc5OZ/mKw2bB3+Tl0ZbtyxxxfnAWln8Q5qs+Y=",
|
||||
"owner": "numtide",
|
||||
"repo": "flake-utils",
|
||||
"rev": "b1d9ab70662946ef0850d488da1c9019f3a9752a",
|
||||
"rev": "7d706970d94bc5559077eb1a6600afddcd25a7c8",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
"owner": "numtide",
|
||||
"ref": "main",
|
||||
"ref": "master",
|
||||
"repo": "flake-utils",
|
||||
"type": "github"
|
||||
}
|
||||
},
|
||||
"gitignore": {
|
||||
"inputs": {
|
||||
"nixpkgs": [
|
||||
"pre-commit-hooks",
|
||||
"nixpkgs"
|
||||
]
|
||||
},
|
||||
"locked": {
|
||||
"lastModified": 1709087332,
|
||||
"narHash": "sha256-HG2cCnktfHsKV0s4XW83gU3F57gaTljL9KNSuG6bnQs=",
|
||||
"owner": "hercules-ci",
|
||||
"repo": "gitignore.nix",
|
||||
"rev": "637db329424fd7e46cf4185293b9cc8c88c95394",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
"owner": "hercules-ci",
|
||||
"repo": "gitignore.nix",
|
||||
"type": "github"
|
||||
}
|
||||
},
|
||||
"nixpkgs": {
|
||||
"locked": {
|
||||
"lastModified": 1722415718,
|
||||
"narHash": "sha256-5US0/pgxbMksF92k1+eOa8arJTJiPvsdZj9Dl+vJkM4=",
|
||||
"lastModified": 1628320020,
|
||||
"narHash": "sha256-4xBEb+TOHyIGpK37EVsZx6dGPwNMf5YWNBJaQ4VyZws=",
|
||||
"owner": "NixOS",
|
||||
"repo": "nixpkgs",
|
||||
"rev": "c3392ad349a5227f4a3464dce87bcc5046692fce",
|
||||
"rev": "67c80531be622641b5b2ccc3a7aff355cb02476b",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
|
@ -74,21 +34,19 @@
|
|||
},
|
||||
"pre-commit-hooks": {
|
||||
"inputs": {
|
||||
"flake-compat": "flake-compat",
|
||||
"gitignore": "gitignore",
|
||||
"nixpkgs": [
|
||||
"nixpkgs"
|
||||
"flake-utils": [
|
||||
"futils"
|
||||
],
|
||||
"nixpkgs-stable": [
|
||||
"nixpkgs": [
|
||||
"nixpkgs"
|
||||
]
|
||||
},
|
||||
"locked": {
|
||||
"lastModified": 1721042469,
|
||||
"narHash": "sha256-6FPUl7HVtvRHCCBQne7Ylp4p+dpP3P/OYuzjztZ4s70=",
|
||||
"lastModified": 1621411868,
|
||||
"narHash": "sha256-R+7OQ2JYFCb3E7Jl7LhRifzMVCR6Gl8R98zYsNhZtJ8=",
|
||||
"owner": "cachix",
|
||||
"repo": "pre-commit-hooks.nix",
|
||||
"rev": "f451c19376071a90d8c58ab1a953c6e9840527fd",
|
||||
"rev": "2e7fac06108b4fc81f5ff9ed9a02bc4f6ede7001",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
|
@ -104,21 +62,6 @@
|
|||
"nixpkgs": "nixpkgs",
|
||||
"pre-commit-hooks": "pre-commit-hooks"
|
||||
}
|
||||
},
|
||||
"systems": {
|
||||
"locked": {
|
||||
"lastModified": 1681028828,
|
||||
"narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
|
||||
"owner": "nix-systems",
|
||||
"repo": "default",
|
||||
"rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
"owner": "nix-systems",
|
||||
"repo": "default",
|
||||
"type": "github"
|
||||
}
|
||||
}
|
||||
},
|
||||
"root": "root",
|
||||
|
|
20
flake.nix
20
flake.nix
|
@ -6,7 +6,7 @@
|
|||
type = "github";
|
||||
owner = "numtide";
|
||||
repo = "flake-utils";
|
||||
ref = "main";
|
||||
ref = "master";
|
||||
};
|
||||
|
||||
nixpkgs = {
|
||||
|
@ -22,8 +22,8 @@
|
|||
repo = "pre-commit-hooks.nix";
|
||||
ref = "master";
|
||||
inputs = {
|
||||
flake-utils.follows = "futils";
|
||||
nixpkgs.follows = "nixpkgs";
|
||||
nixpkgs-stable.follows = "nixpkgs";
|
||||
};
|
||||
};
|
||||
};
|
||||
|
@ -61,17 +61,15 @@
|
|||
};
|
||||
};
|
||||
|
||||
devShells = {
|
||||
default = pkgs.mkShell {
|
||||
name = "blog";
|
||||
devShell = pkgs.mkShell {
|
||||
name = "blog";
|
||||
|
||||
buildInputs = with pkgs; [
|
||||
gnumake
|
||||
hugo
|
||||
];
|
||||
buildInputs = with pkgs; [
|
||||
gnumake
|
||||
hugo
|
||||
];
|
||||
|
||||
inherit (self.checks.${system}.pre-commit) shellHook;
|
||||
};
|
||||
inherit (self.checks.${system}.pre-commit) shellHook;
|
||||
};
|
||||
}
|
||||
);
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
series:
|
||||
other: "series"
|
||||
serie:
|
||||
other: "serie"
|
||||
|
||||
Series:
|
||||
other: "Series"
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
series:
|
||||
serie:
|
||||
other: "série"
|
||||
|
||||
Series:
|
||||
|
|
|
@ -11,7 +11,6 @@
|
|||
<a data-hint="Sourcehut" title="Sourcehut" href="https://sr.ht/~{{ .Site.Author.sourcehut }}" target="_blank" rel="me"> Sourcehut </a>
|
||||
<a data-hint="LinkedIn" title="LinkedIn" href="https://www.linkedin.com/in/{{ .Site.Author.linkedin }}" target="_blank" rel="me"> LinkedIn </a>
|
||||
<a data-hint="Matrix" title="Matrix" href="https://matrix.to/#/{{ .Site.Author.matrix }}" target="_blank" rel="me"> Matrix </a>
|
||||
<a data-hint="Mastodon" title="Mastodon" href="https://{{ .Site.Author.mastodon }}" target="_blank" rel="me"> Mastodon </a>
|
||||
<a rel="pgpkey" href="https://key.belanyi.fr/key.pgp"> PGP </a>
|
||||
<link rel="authorization_endpoint" href="https://indieauth.com/auth">
|
||||
<p>
|
||||
|
|
|
@ -3,30 +3,6 @@
|
|||
<link rel="stylesheet" type="text/css" href="https://tikzjax.com/v1/fonts.css">
|
||||
<script async src="https://tikzjax.com/v1/tikzjax.js"></script>
|
||||
{{ end }}
|
||||
<!-- Graphviz support -->
|
||||
{{ if (.Params.graphviz) }}
|
||||
<script src="https://cdn.jsdelivr.net/npm/@viz-js/viz@3.7.0/lib/viz-standalone.min.js"></script>
|
||||
<script type="text/javascript">
|
||||
(function() {
|
||||
Viz.instance().then(function(viz) {
|
||||
Array.prototype.forEach.call(document.querySelectorAll("pre.graphviz"), function(x) {
|
||||
var svg = viz.renderSVGElement(x.innerText);
|
||||
// Let CSS take care of the SVG size
|
||||
svg.removeAttribute("width")
|
||||
svg.setAttribute("height", "auto")
|
||||
x.replaceChildren(svg)
|
||||
})
|
||||
})
|
||||
})();
|
||||
</script>
|
||||
{{ end }}
|
||||
<!-- Mermaid support -->
|
||||
{{ if (.Params.mermaid) }}
|
||||
<script type="module" async>
|
||||
import mermaid from "https://cdn.jsdelivr.net/npm/mermaid@latest/dist/mermaid.esm.min.mjs";
|
||||
mermaid.initialize({ startOnLoad: true });
|
||||
</script>
|
||||
{{ end }}
|
||||
{{ with .OutputFormats.Get "atom" -}}
|
||||
{{ printf `<link rel="%s" type="%s" href="%s" title="%s" />` .Rel .MediaType.Type .Permalink $.Site.Title | safeHTML }}
|
||||
{{ end -}}
|
||||
|
|
|
@ -1,16 +0,0 @@
|
|||
<pre class="graphviz">
|
||||
{{ with .Get "file" }}
|
||||
{{ if eq (. | printf "%.1s") "/" }}
|
||||
{{/* Absolute path are from root of site. */}}
|
||||
{{ $.Scratch.Set "filepath" . }}
|
||||
{{ else }}
|
||||
{{/* Relative paths are from page directory. */}}
|
||||
{{ $.Scratch.Set "filepath" $.Page.File.Dir }}
|
||||
{{ $.Scratch.Add "filepath" . }}
|
||||
{{ end }}
|
||||
|
||||
{{ $.Scratch.Get "filepath" | readFile }}
|
||||
{{ else }}
|
||||
{{.Inner}}
|
||||
{{ end }}
|
||||
</pre>
|
|
@ -1,16 +0,0 @@
|
|||
<pre class="mermaid">
|
||||
{{ with .Get "file" }}
|
||||
{{ if eq (. | printf "%.1s") "/" }}
|
||||
{{/* Absolute path are from root of site. */}}
|
||||
{{ $.Scratch.Set "filepath" . }}
|
||||
{{ else }}
|
||||
{{/* Relative paths are from page directory. */}}
|
||||
{{ $.Scratch.Set "filepath" $.Page.File.Dir }}
|
||||
{{ $.Scratch.Add "filepath" . }}
|
||||
{{ end }}
|
||||
|
||||
{{ $.Scratch.Get "filepath" | readFile }}
|
||||
{{ else }}
|
||||
{{.Inner}}
|
||||
{{ end }}
|
||||
</pre>
|
|
@ -1,16 +1,3 @@
|
|||
<script type="text/tikz">
|
||||
{{ with .Get "file" }}
|
||||
{{ if eq (. | printf "%.1s") "/" }}
|
||||
{{/* Absolute path are from root of site. */}}
|
||||
{{ $.Scratch.Set "filepath" . }}
|
||||
{{ else }}
|
||||
{{/* Relative paths are from page directory. */}}
|
||||
{{ $.Scratch.Set "filepath" $.Page.File.Dir }}
|
||||
{{ $.Scratch.Add "filepath" . }}
|
||||
{{ end }}
|
||||
|
||||
{{ $.Scratch.Get "filepath" | readFile }}
|
||||
{{ else }}
|
||||
{{.Inner}}
|
||||
{{ end }}
|
||||
{{.Inner}}
|
||||
</script>
|
||||
|
|
|
@ -1 +1 @@
|
|||
Subproject commit d77e0d6d552ad135138f1f9443ef30cc5af8f0f3
|
||||
Subproject commit 5dab60e04a37896c09a32137aefe821c63b3af04
|
|
@ -1 +1 @@
|
|||
Subproject commit d545effed9949bf834eaed09ad423ec3e030794f
|
||||
Subproject commit 5da913dc46d2dadcaf6548256238c58c504476de
|
Loading…
Reference in a new issue