Compare commits

..

1 commit

Author SHA1 Message Date
Bruno BELANYI ee916a506b build: makefile: add deploy step
Some checks reported errors
continuous-integration/drone/push Build encountered an error
2021-08-08 16:23:51 +02:00
39 changed files with 122 additions and 3447 deletions

64
.drone.jsonnet Normal file
View file

@ -0,0 +1,64 @@
local Pipeline(isDev) = {
kind: "pipeline",
name: if isDev then "deploy-dev" else "deploy-prod",
# Dev ignores "master", prod only triggers on "master"
trigger: { branch: { [if isDev then "exclude" else "include"]: [ "main" ] } },
# We want to clone the submodules, which isn't done by default
clone: { disable: true },
steps: [
{
name: "clone",
image: "plugins/git",
recursive: true,
},
{
name: "markdownlint",
image: "06kellyjac/markdownlint-cli",
commands: [
"markdownlint --version",
"markdownlint content/",
],
},
{
name: "build",
image: "klakegg/hugo",
commands: [
"hugo version",
# If dev, include drafts and future articles, change base URL
"hugo --minify" + if isDev then " -D -F -b https://dev.belanyi.fr" else "",
],
[if !isDev then "environment"]: { HUGO_ENV: "production" }
},
{
name: "deploy",
image: "appleboy/drone-scp",
settings: {
source: "public/*",
strip_components: 1, # Remove 'public/' suffix from file paths
rm: true, # Remove previous files from target directory
host: { from_secret: "ssh_host" },
target: { from_secret: "ssh_target" + if isDev then "_dev" else "" },
username: { from_secret: "ssh_user" },
key: { from_secret: "ssh_key" },
port: { from_secret: "ssh_port" },
},
},
{
name: "notify",
image: "plugins/matrix",
settings: {
homeserver: { from_secret: "matrix_homeserver" },
roomid: { from_secret: "matrix_roomid" },
username: { from_secret: "matrix_username" },
password: { from_secret: "matrix_password" },
},
trigger: { status: [ "failure", "success", ] },
},
]
};
[
Pipeline(false),
Pipeline(true),
]

7
.envrc
View file

@ -1 +1,8 @@
use_flake() {
watch_file flake.nix
watch_file flake.lock
eval "$(nix print-dev-env)"
}
use flake
eval "$shellHooks"

View file

@ -1,3 +0,0 @@
# MD024/no-duplicate-heading/no-duplicate-header
MD024:
siblings_only: true

View file

@ -1,64 +0,0 @@
labels:
backend: local
matrix:
include:
- TYPE: dev
MAKE_TARGET: build-dev
SSH_TARGET: ssh_target_dev
- TYPE: prod
MAKE_TARGET: build-prod
SSH_TARGET: ssh_target
# Run the correct matrix build on the correct branch
when:
evaluate: |
((CI_COMMIT_BRANCH == CI_REPO_DEFAULT_BRANCH) == ("${TYPE}" == "prod"))
steps:
- name: check
image: bash
commands:
- nix flake check
- name: build (${TYPE})
image: bash
commands:
# If dev, include drafts and future articles, change base URL
- nix develop -c make ${MAKE_TARGET}
- name: deploy (${TYPE})
image: bash
environment:
# Trailing slash to synchronize the folder's *content* to the target
SYNC_SOURCE: public/
SYNC_KEY:
from_secret: ssh_key
SYNC_PORT:
from_secret: ssh_port
SYNC_TARGET:
from_secret: ${SSH_TARGET}
SYNC_USERNAME:
from_secret: ssh_user
SYNC_HOST:
from_secret: ssh_host
commands:
- "nix run github:ambroisie/nix-config#drone-rsync"
- name: notify
image: bash
environment:
ADDRESS:
from_secret: matrix_homeserver
ROOM:
from_secret: matrix_roomid
USER:
from_secret: matrix_username
PASS:
from_secret: matrix_password
commands:
- nix run github:ambroisie/matrix-notifier
when:
status:
- failure
- success

View file

@ -3,7 +3,7 @@ all: build-dev
.PHONY: build-dev
build-dev:
HUGO_TITLE="Ambroisie's dev blog" HUGO_BASEURL=https://dev.belanyi.fr hugo -D -F
HUGO_BASEURL=https://dev.belanyi.fr hugo -D -F
.PHONY: build-prod
build-prod:
@ -13,6 +13,11 @@ build-prod:
serve:
hugo server -D -F
deploy:
@if [ -n "$$KEY" ]; then eval "$$(ssh-agent)"; echo "$$KEY" | ssh-add -; fi
if [ -z "$$USERNAME" ] || [ -z "$$SSH_HOST" ] || [ -z "$$TARGET" ]; then exit 1; fi
rsync --progress -avz --delete public/ "$$USERNAME@$$SSH_HOST:$$TARGET"
.PHONY: clean
clean:
$(RM) -r public

View file

@ -5,18 +5,15 @@ draft: false # I don't care for draft mode, git has branches for that
description: ""
tags:
- accounting
- algorithms
- c++
- ci/cd
- cli
- data structures
- design-pattern
- docker
- drone
- git
- hugo
- nix
- python
- self-hosting
- test
categories:

View file

@ -6,6 +6,8 @@ theme:
- "hugo-atom-feed"
- "anubis"
paginate: 5
disqusShortname: ""
googleAnalytics: ""
enableRobotsTXT: true
enableEmoji: true
@ -35,12 +37,11 @@ menu:
author:
name: "Bruno BELANYI"
email: "contact-blog@belanyi.fr"
github: "ambroisie"
gitlab: "ambroisie"
github: "Ambroisie"
gitlab: "Ambroisie"
sourcehut: "ambroisie"
linkedin: "bruno-belanyi"
matrix: "@ambroisie:belanyi.fr"
mastodon: "nixos.paris/@ambroisie"
permalinks:
posts: /:year/:month/:day/:title/
@ -65,18 +66,11 @@ params:
webmentions:
login: belanyi.fr
pingback: true
mathjax: true
services:
disqus:
shortname: ""
googleAnalytics:
ID: ""
taxonomies:
category: "categories"
tag: "tags"
series: "series"
serie: "series"
markup:
goldmark:

View file

@ -4,13 +4,7 @@ description: "About me"
date: 2020-07-14
---
I'm currently working as a Senior Software Engineer at [Google][google], as part
of their Embedded Graphics Drivers team for Pixel devices.
[google]: https://www.linkedin.com/company/google/mycompany/verification/
I'm a CS student at EPITA.
You can find my CV [here](https://cv.belanyi.fr/en.pdf). Or
[here](https://cv.belanyi.fr/fr.pdf) for the french version.
If you are a friend or family, you might be looking for my [wish
lists]({{< ref "wish-lists.md" >}}) to find some present ideas.

View file

@ -1,172 +0,0 @@
---
title: "Magic Conversions in C++"
date: 2021-10-01T14:46:14+02:00
draft: false # I don't care for draft mode, git has branches for that
description: "How to get the compiler to infer the correct conversion"
tags:
- c++
- design-pattern
categories:
- programming
series:
favorite: false
disable_feed: false
---
One feature that I like a lot in [Rust][rust-lang] is return type polymorphism,
best exemplified with the following snippet of code:
```rust
use std::collections::HashSet;
fn main() {
let vec: Vec<_> = (0..10).filter(|a| a % 2 == 0).collect();
let set: HashSet<_> = (0..10).filter(|a| a % 2 == 0).collect();
println!("vec: {:?}", vec);
println!("set: {:?}", set);
}
```
We have the same expression (`(0..10).filter(|a| a % 2 == 0).collect()`) that
results in two totally different types of values (a `Vec` and a `HashSet`)!
This is because Rust allows you to write a function which is generic in its
*return type*, which is a super-power that C++ does not have. But is there a way
to emulate this behaviour with some clever code?
[rust-lang]: https://rust-lang.org/
<!--more-->
## The problem
For the purposes of this article, the problem that I am trying to solve will be
the following:
```c++
void takes_small_array(std::array<char, 32> arr);
void takes_big_array(std::array<char, 4096> arr);
// How to define a `to_array` function so that the following works?
void test(std::string_view s) {
takes_small_array(to_array(s));
takes_big_array(to_array(s));
}
```
## First attempt
If we try to solve this in a way similar to Rust, we hit a problem in what the
language allows us to write:
```c++
std::array<char, 32> to_array(std::string_view s) {
std::array<char, 32> ret;
std::copy(s.begin(), s.end(), ret.begin());
return ret;
}
std::array<char, 4096> to_array(std::string_view s) {
std::array<char, 4096> ret;
std::copy(s.begin(), s.end(), ret.begin());
return ret;
}
```
The compiler complains with the following error:
```none
ambiguating new declaration of 'std::array<char, 4096> to_array(std::string_view)'
note: old declaration 'std::array<char, 32> to_array(std::string_view)'
```
That is because C++ does **not** allow you to write an overload set based on
*return type only*.
## Using templates
For our second try, we want to use *non-type template parameters* to solve the
issue. We write the following:
```c++
template <size_t N>
std::array<char, N> to_array(std::string_view s) {
std::array<char, N> ret;
std::copy(s.begin(), s.end(), ret.begin());
return ret;
}
```
The compiler does not complain when we write this! We have also solved two minor
issues with the previous try: the size of the arrays are not hard-coded, and we
kept the code DRY.
However we have some trouble trying to use those functions as stated in the
beginning of the problem, with the following error message:
```none
error: no matching function for call to 'to_array(std::string_view&)'
| takes_small_array(to_array(s));
note: candidate: 'template<size_t N> std::array<char, N> to_array(std::string_view)'
| std::array<char, N> to_array(std::string_view s) {
note: template argument deduction/substitution failed:
note: couldn't deduce template parameter 'N'
```
The compiler cannot deduce the size of the array we want to use! We could solve
the issue by explicitly giving a size when calling the function
(`to_array<32>(s)`) however this is unsatisfactory: we are not solving the
problem as stated initially, which could for example lead to needless churning
if we change the signature of `takes_small_array` to instead use
`std::array<char, 64>`).
Thankfully there is a way to use the compiler to our advantage, and have it
deduce it for us, but it involves some trickery.
## The solution
We want to write a function that resolves the previous two issues we
experienced:
* The non-type template parameter must be deduced by the end of the call to
`to_array`, but we can only deduce it once it is being consumed by
`takes_{small,big}_array` -- which is too late for the compiler.
* We cannot overload on the return type, which means we must return a single
type from the function.
The goal is to delay *when* the deduction of the array's size is happening,
which can be done by using a *templated conversion operator*.
So the solution to our problem is to do the following:
```c++
class ToArray {
std::string_view s_;
public:
ToArray(std::string_view s) : s_(s) {}
template <size_t N>
operator std::array<char, N>() const {
std::array<char, N> ret;
std::copy(s_.begin(), s_.end(), ret.begin());
return ret;
}
}
ToArray to_array(std::string_view s) {
return ToArray{s};
}
```
The following steps happen when trying to call `takes_small_array(to_array(s))`:
* `to_array(s)` returns a `ToArray` value.
* the `ToArray` value is not an `array<char, 32>`, but has an implicit
conversion operator, which the compiler invokes.
* `takes_small_array` is called with the converted `array<char, 32>` value.
We now have a "magic" function which can convert a `string_view` to an
`std::array` of characters of any size. We could further improve this by
ensuring that the array is terminated with a `'\0'`, throwing an exception when
the array is too small for the given string, etc... This is left as an exercise
to the reader.

View file

@ -1,329 +0,0 @@
---
title: "Multiple Dispatch in C++"
date: 2022-11-02T16:36:53+01:00
draft: false # I don't care for draft mode, git has branches for that
description: "A Lisp super-power in C++"
tags:
- c++
- design-pattern
categories:
- programming
series:
favorite: false
disable_feed: false
---
A great feature that can be used in more dynamic languages is *multiple
dispatch*. Here's an example in [Julia][julia-lang] taken from the [Wikipedia
article][wiki-multiple-dispatch].
```julia
abstract type SpaceObject end
struct Asteroid <: SpaceObject
# Asteroid fields
end
struct Spaceship <: SpaceObject
# Spaceship fields
end
collide_with(::Asteroid, ::Spaceship) = # Asteroid/Spaceship collision
collide_with(::Spaceship, ::Asteroid) = # Spaceship/Asteroid collision
collide_with(::Spaceship, ::Spaceship) = # Spaceship/Spaceship collision
collide_with(::Asteroid, ::Asteroid) = # Asteroid/Asteroid collision
collide(x::SpaceObject, y::SpaceObject) = collide_with(x, y)
```
The `collide` function calls `collide_with` which, at runtime, will inspect the
types of its arguments and *dispatch* to the appropriate implementation.
Julia was created with multiple dispatch as a first-class citizen, it is used
liberally in its ecosystem. C++ does not have access to such a feature natively,
but there are alternatives that I will be presenting in this article, and try to
justify there uses and limitations.
[julia-lang]: https://julialang.org/
[wiki-multiple-dispatch]: https://en.wikipedia.org/wiki/Multiple_dispatch
<!--more-->
## Single dispatch
The native way to perform dynamic dispatch in C++ is through the
use of *virtual methods*, which allows an object to *override* the behaviour of
one of its super-classes' method.
Invoking a virtual method will perform *single dispatch*, on the dynamic type
of the object who's method is being called.
Here is an example:
```cpp
struct SpaceObject {
virtual ~SpaceObject() = default;
// Pure virtual method, which must be overridden by non-abstract sub-classes
virtual void impact() = 0;
};
struct Asteroid : SpaceObject {
// Override the method for asteroid impacts
void impact() override {
std::cout << "Bang!\n";
}
};
struct Spaceship : SpaceObject {
// Override the method for spaceship impacts
void impact() override {
std::cout << "Crash!\n";
}
};
int main() {
std::unique_ptr<SpaceObject> object = std::make_unique<Spaceship>();
object->impact(); // Prints "Crash!"
object = std::make_unique<Asteroid>();
object->impact(); // Prints "Bang!"
}
```
Virtual methods are great when you want to represent a common set of behaviour
(an *interface*), and be able to substitute various types with their specific
implementation.
For example, a dummy file-system interface might look like the following:
```cpp
struct Filesystem {
virtual void write(std::string_view filename, std::span<char> data) = 0;
virtual std::vector<char> read(std::string_view filename) = 0;
virtual void delete(std::string_view filename) = 0;
};
```
You can then write `PosixFilesystem` which makes use of the POSIX API and
interact with actual on-disk data, `MockFilesystem` which only works in-memory
and can be used for testing, etc...
## Double dispatch through the Visitor pattern
Sometimes single dispatch is not enough, such as in the collision example at the
beginning of this article. In cases where a computation depends on the dynamic
type of *two* of its values, we can make use of double-dispatch by leveraging
the Visitor design pattern. This is done by calling a virtual method on the
first value, which itself will call a virtual method on the second value.
Here's a commentated example:
```cpp
struct Asteroid;
struct Spaceship;
struct SpaceObject {
virtual ~SpaceObject() = default;
// Only used to kick-start the double-dispatch process
virtual void collide_with(SpaceObject& other) = 0;
// The actual dispatching methods
virtual void collide_with(Asteroid& other) = 0;
virtual void collide_with(Spaceship& other) = 0;
};
struct Asteroid : SpaceObject {
void collide_with(SpaceObject& other) override {
// `*this` is an `Asteroid&` which kick-starts the double-dispatch
other.collide_with(*this);
};
void collide_with(Asteroid& other) override { /* Asteroid/Asteroid */ };
void collide_with(Spaceship& other) override { /* Asteroid/Spaceship */ };
};
struct Spaceship : SpaceObject {
void collide_with(SpaceObject& other) override {
// `*this` is a `Spaceship&` which kick-starts the double-dispatch
other.collide_with(*this);
};
void collide_with(Asteroid& other) override { /* Spaceship/Asteroid */ };
void collide_with(Spaceship& other) override { /* Spaceship/Spaceship */ };
};
void collide(SpaceObject& first, SpaceObject& second) {
first.collide_with(second);
};
int main() {
auto asteroid = std::make_unique<Asteroid>();
auto spaceship = std::make_unique<Spaceship>();
collide(*asteroid, *spaceship);
// Calls in order:
// - Asteroid::collide_with(SpaceObject&)
// - Spaceship::collide_with(Asteroid&)
collide(*spaceship, *asteroid);
// Calls in order:
// - Spaceship::collide_with(SpaceObject&)
// - Asteroid::collide_with(Spaceship&)
asteroid->collide_with(*spaceship);
// Only calls Asteroid::collide_with(Spaceship&)
spaceship->collide_with(*asteroid);
// Only calls Spaceship::collide_with(Asteroid&)
}
```
Double dispatch is pattern is most commonly used with the *visitor pattern*, in
which a closed class hierarchy (the data) is separated from an open class
hierarchy (the algorithms acting on that data). This is especially useful in
e.g: compilers, where the AST class hierarchy represents the data *only*, and
all compiler stages and optimization passes are programmed by a series of
visitors.
One downside of this approach is that if you want to add `SpaceStation` as
a sub-class of `SpaceObject`, and handle its collisions with other
`SpaceObject`s, you need to:
* Implement all `collide_with` methods for this new class.
* Add a new virtual method `collide_with(SpaceStation&)` and implement it on
every sub-class.
This can be inconvenient if your class hierarchy changes often.
## Multiple dispatch on a closed class hierarchy
When even double dispatch is not enough, there is a way to do multiple dispatch
in standard C++, included in the STL since C++17. However unlike the previous
methods I showed, this one relies on using [`std::variant`][variant-cppref] and
[`std::visit`][visit-cppref].
[variant-cppref]: https://en.cppreference.com/w/cpp/utility/variant
[visit-cppref]: https://en.cppreference.com/w/cpp/utility/variant/visit
The limitation of `std::variant` is that you are limited to the types you can
select at *compile-time* for the values used during your dispatch operation.
You have a *closed* hierarchy of classes, which is the explicit list of types in
your `variant`.
Nonetheless, if you can live with that limitation, then you have a great amount
of power available to you. I have used `std::visit` in the past to mimic the
effect of pattern matching.
In this example, I re-create the double-dispatch from the previous section:
```cpp
// No need to inherit from a `SpaceObject` base class
struct Asteroid {};
struct Spaceship {};
// But the list of possible runtime *must* be enumerated at compile-time
using SpaceObject = std::variant<Asteroid, Spaceship>;
void collide(SpaceObject& first, SpaceObject& second) {
struct CollideDispatch {
void operator()(Asteroid& first, Asteroid& second) {
// Asteroid/Asteroid
}
void operator()(Asteroid& first, Spaceship& second) {
// Asteroid/Spaceship
}
void operator()(Spaceship& first, Asteroid& second) {
// Spaceship/Asteroid
}
void operator()(Spaceship& first, Spaceship& second) {
// Spaceship/Spaceship
}
};
std::visit(CollideDispatch(), first, second);
}
int main() {
SpaceObject asteroid = Asteroid();
SpaceObject spaceship = Spaceship();
collide(asteroid, spaceship);
// Calls CollideDispatch::operator()(Asteroid&, Spaceship&)
collide(spaceship, asteroid);
// Calls CollideDispatch::operator()(Spaceship&, Asteroid&)
}
```
Obviously, the issue with adding a new `SpaceStation` variant is once again
apparent in this implementation. You will get a compile error unless you handle
this new `SpaceStation` variant at every point you `visit` the `SpaceObject`s.
## The Expression Problem
One issue we have not been able to move past in these examples is the
[Expression Problem][expression-problem]. In two words, this means that we can't
add a new data type (e.g: `SpaceStation`), or a new operation (e.g: `land_on`)
to our current code without re-compiling it.
[expression-problem]: https://en.wikipedia.org/wiki/Expression_problem
This is the downside I was pointing out in our previous sections:
* Data type extension: one can easily add a new `SpaceObject` child-class in the
OOP version, but needs to modify each implementation if we want to add a new
method to the `SpaceObject` interface to implement a new operation.
* Operation extension: one can easily create a new function when using the
`std::variant` based representation, as pattern-matching easily allows us to
only handle the kinds of values we are interested in. But adding a new
`SpaceObject` variant means we need to modify and re-compile every
`std::visit` call to handle the new variant.
There is currently no (good) way in standard C++ to tackle the Expression
Problem. A paper ([N2216][N2216]) was written to propose a new language feature
to improve the situation. However it looks quite complex, and never got followed
up on for standardization.
[N2216]: https://open-std.org/jtc1/sc22/wg21/docs/papers/2007/n2216.pdf
In the meantime, one can find some libraries (like [`yomm2`][yomm2]) that
reduce the amount of boiler-plate needed to emulate this feature.
[yomm2]: https://github.com/jll63/yomm2
```cpp
#include <yorel/yomm2/keywords.hpp>
struct SpaceObject {
virtual ~SpaceObject() = default;
};
struct Asteroid : SpaceObject { /* fields, methods, etc... */ };
struct Spaceship : SpaceObject { /* fields, methods, etc... */ };
// Register all sub-classes of `SpaceObject` for use with open methods
register_classes(SpaceObject, Asteroid, Spaceship);
// Register the `collide` open method, which dispatches on two arguments
declare_method(void, collide, (virtual_<SpaceObject&>, virtual_<SpaceObject&>));
// Write the different implementations of `collide`
define_method(void, collide, (Asteroid& left, Asteroid& right)) { /* work */ }
define_method(void, collide, (Asteroid& left, Spaceship& right)) { /* work */ }
define_method(void, collide, (Spaceship& left, Asteroid& right)) { /* work */ }
define_method(void, collide, (Spaceship& left, Spaceship& right)) { /* work */ }
int main() {
yorel::yomm2::update_methods();
auto asteroid = std::make_unique<Asteroid>();
auto spaceship = std::make_unique<Spaceship>();
collide(*asteroid, *spaceship); // Calls (Asteroid, Spaceship) version
collide(*spaceship, *asteroid); // Calls (Spaceship, Asteroid) version
collide(*asteroid, *asteroid); // Calls (Asteroid, Asteroid) version
collide(*spaceship, *spaceship); // Calls (Spaceship, Spaceship) version
}
```

View file

@ -1,157 +0,0 @@
---
title: "Union Find"
date: 2024-06-24T21:07:49+01:00
draft: false # I don't care for draft mode, git has branches for that
description: "My favorite data structure"
tags:
- algorithms
- data structures
- python
categories:
- programming
series:
- Cool algorithms
favorite: false
disable_feed: false
---
To kickoff the [series]({{< ref "/series/cool-algorithms/" >}}) of posts about
algorithms and data structures I find interesting, I will be talking about my
favorite one: the [_Disjoint Set_][wiki]. Also known as the _Union-Find_ data
structure, so named because of its two main operations: `ds.union(lhs, rhs)` and
`ds.find(elem)`.
[wiki]: https://en.wikipedia.org/wiki/Disjoint-set_data_structure
<!--more-->
## What does it do?
The _Union-Find_ data structure allows one to store a collection of sets of
elements, with operations for adding new sets, merging two sets into one, and
finding the representative member of a set. Not only does it do all that, but it
does it in almost constant (amortized) time!
Here is a small motivating example for using the _Disjoint Set_ data structure:
```python
def connected_components(graph: Graph) -> list[set[Node]]:
# Initialize the disjoint set so that each node is in its own set
ds: DisjointSet[Node] = DisjointSet(graph.nodes)
# Each edge is a connection, merge both sides into the same set
for (start, dest) in graph.edges:
ds.union(start, dest)
# Connected components share the same (arbitrary) root
components: dict[Node, set[Node]] = defaultdict(set)
for n in graph.nodes:
components[ds.find(n)].add(n)
# Return a list of disjoint sets corresponding to each connected component
return list(components.values())
```
## Implementation
I will show how to implement `UnionFind` for integers, though it can easily be
extended to be used with arbitrary types (e.g: by mapping each element
one-to-one to a distinct integer, or using a different set representation).
### Representation
Creating a new disjoint set is easy enough:
```python
class UnionFind:
_parent: list[int]
_rank: list[int]
def __init__(self, size: int):
# Each node is in its own set, making it its own parent...
self._parents = list(range(size))
# ... And its rank 0
self._rank = [0] * size
```
We represent each set through the `_parent` field: each element of the set is
linked to its parent, until the root node which is its own parent. When first
initializing the structure, each element is in its own set, so we initialize
each element to be a root and make it its own parent (`_parent[i] == i` for all
`i`).
The `_rank` field is an optimization which we will touch on in a later section.
### Find
A naive Implementation of `find(...)` is simple enough to write:
```python
def find(self, elem: int) -> int:
# If `elem` is its own parent, then it is the root of the tree
if (parent := self._parent[elem]) == elem:
return elem
# Otherwise, recurse on the parent
return self.find(parent)
```
However, going back up the chain of parents each time we want to find the root
node (an `O(n)` operation) would make for disastrous performance. Instead we can
do a small optimization called _path splitting_.
```python
def find(self, elem: int) -> int:
while (parent := self._parent[elem]) != elem:
# Replace each parent link by a link to the grand-parent
elem, self._parent[elem] = parent, self._parent[parent]
return elem
```
This flattens the chain so that each node links more directly to the root (the
length is reduced by half), making each subsequent `find(...)` faster.
Other compression schemes exist, along the spectrum between faster shortening
the chain faster earlier, or updating `_parent` fewer times per `find(...)`.
### Union
A naive implementation of `union(...)` is simple enough to write:
```python
def union(self, lhs: int, rhs: int) -> int:
# Replace both element by their root parent
lhs = self.find(lhs)
rhs = self.find(rhs)
# arbitrarily merge one into the other
self._parent[rhs] = lhs
# Return the new root
return lhs
```
Once again, improvements can be made. Depending on the order in which we call
`union(...)`, we might end up creating a long chain from the leaf of the tree to
the root node, leading to slower `find(...)` operations. If at all possible, we
would like to keep the trees as shallow as possible.
To do so, we want to avoid merging taller trees into smaller ones, so as to keep
them as balanced as possible. Since a higher tree will result in a slower
`find(...)`, keeping the trees balanced will lead to increased performance.
This is where the `_rank` field we mentioned earlier comes in: the _rank_ of an
element is an upper bound on its height in the tree. By keeping track of this
_approximate_ height, we can keep the trees balanced when merging them.
```python
def union(self, lhs: int, rhs: int) -> int:
lhs = self.find(lhs)
rhs = self.find(rhs)
# Bail out early if they already belong to the same set
if lhs == rhs:
return lhs
# Always keep `lhs` as the taller tree
if (self._rank[lhs] < self._rank[rhs])
lhs, rhs = rhs, lhs
# Merge the smaller tree into the taller one
self._parent[rhs] = lhs
# Update the rank when merging trees of approximately the same size
if self._rank[lhs] == self._rank[rhs]:
self._rank[lhs] += 1
return lhs
```

View file

@ -1,171 +0,0 @@
---
title: "Trie"
date: 2024-06-30T11:07:49+01:00
draft: false # I don't care for draft mode, git has branches for that
description: "A cool map"
tags:
- algorithms
- data structures
- python
categories:
- programming
series:
- Cool algorithms
favorite: false
disable_feed: false
---
This time, let's talk about the [_Trie_][wiki], which is a tree-based mapping
structure most often used for string keys.
[wiki]: https://en.wikipedia.org/wiki/Trie
<!--more-->
## What does it do?
A _Trie_ can be used to map a set of string keys to their corresponding values,
without the need for a hash function. This also means you won't suffer from hash
collisions, though the tree-based structure will probably translate to slower
performance than a good hash table.
A _Trie_ is especially useful to represent a dictionary of words in the case of
spell correction, as it can easily be used to fuzzy match words under a given
edit distance (think [Levenshtein distance])
[Levenshtein distance]: https://en.wikipedia.org/wiki/Levenshtein_distance
## Implementation
This implementation will be in Python for exposition purposes, even though
it already has a built-in `dict`.
### Representation
Creating a new `Trie` is easy: the root node starts off empty and without any
mapped values.
```python
class Trie[T]:
_children: dict[str, Trie[T]]
_value: T | None
def __init__(self):
# Each letter is mapped to a Trie
self._children = defaultdict(Trie)
# If we match a full string, we store the mapped value
self._value = None
```
We're using a `defaultdict` for the children for ease of implementation in this
post. In reality, I would encourage you exit early when you can't match a given
character.
The string key will be implicit by the position of a node in the tree: the empty
string at the root, one-character strings as its direct children, etc...
### Search
An exact match look-up is easily done: we go down the tree until we've exhausted
the key. At that point we've either found a mapped value or not.
```python
def get(self, key: str) -> T | None:
# Have we matched the full key?
if not key:
# Store the `T` if mapped, `None` otherwise
return self._value
# Otherwise, recurse on the child corresponding to the first letter
return self._children[key[0]].get(key[1:])
```
### Insertion
Adding a new value to the _Trie_ is similar to a key lookup, only this time we
store the new value instead of returning it.
```python
def insert(self, key: str, value: T) -> bool:
# Have we matched the full key?
if not key:
# Check whether we're overwriting a previous mapping
was_mapped = self._value is None
# Store the corresponding value
self._value = value
# Return whether we've performed an overwrite
return was_mapped
# Otherwise, recurse on the child corresponding to the first letter
return self._children[key[0]].insert(key[1:], value)
```
### Removal
Removal should also look familiar.
```python
def remove(self, key: str) -> bool:
# Have we matched the full key?
if not key:
was_mapped = self._value is None
# Remove the value
self._value = None
# Return whether it was mapped
return was_mapped
# Otherwise, recurse on the child corresponding to the first letter
return self._children[key[0]].remove(key[1:])
```
### Fuzzy matching
Fuzzily matching a given word is where the real difficulty is: the key is to
realize we can use the prefix-tree nature of a _Trie_ to avoid doing wasteful
work.
By leveraging the prefix visit order of the tree, we can build an iterative
Levenshtein distance matrix, in much the same way one would do so in its
[Dynamic Programming] implementation (see the [Wagner-Fisher algorithm]).
[Dynamic Programming]: https://en.wikipedia.org/wiki/Dynamic_programming
[Wagner-Fisher algorithm]: https://en.wikipedia.org/wiki/Wagner%E2%80%93Fischer_algorithm
```python
class FuzzyResult[T](NamedTuple):
distance: int
key: str
value: T
def get_fuzzy(self, key: str, max_distance: int = 0) -> Iterator[FuzzyResult[T]]:
def helper(
current_word: str,
node: Trie[T],
previous_row: list[int],
) -> Iterator[tuple[int, T]]:
# Iterative Levenshtein
current_row = [previous_row[0] + 1]
current_char = current_word[-1]
for column, key_char in enumerate(key, start=1):
insertion = current_row[column - 1] + 1
deletion = previous_row[column] + 1
replacement = previous_row[column - 1] + (key_char != current_char)
current_row.append(min(insertion, deletion, replacement))
# If we are under the max distance, match this node
if (distance := current_row[-1]) <= max_distance and node._value != None:
# Only if it has a value of course
yield FuzzyResult(distance, current_word, node._value)
# If we can potentially still match children, recurse
if min(current_row) <= max_distance:
for c, child in node._children.items():
yield from helper(current_word + c, child, current_row)
# Build the first row -- the edit distance from the empty string
row = list(range(len(key) + 1))
# Base case for the empty string
if (distance := row[-1]) <= max_distance and self._value != None:
yield FuzzyResult(distance, "", self._value)
for c, child in self._children.items():
yield from helper(c, child, row)
```

View file

@ -1,191 +0,0 @@
---
title: "Gap Buffer"
date: 2024-07-06T21:27:19+01:00
draft: false # I don't care for draft mode, git has branches for that
description: "As featured in GNU Emacs"
tags:
- algorithms
- data structures
- python
categories:
- programming
series:
- Cool algorithms
favorite: false
disable_feed: false
---
The [_Gap Buffer_][wiki] is a popular data structure for text editors to
represent files and editable buffers. The most famous of them probably being
[GNU Emacs][emacs].
[wiki]: https://en.wikipedia.org/wiki/Gap_buffer
[emacs]: https://www.gnu.org/software/emacs/manual/html_node/elisp/Buffer-Gap.html
<!--more-->
## What does it do?
A _Gap Buffer_ is simply a list of characters, similar to a normal string, with
the added twist of splitting it into two side: the prefix and suffix, on either
side of the cursor. In between them, a gap is left to allow for quick
insertion at the cursor.
Moving the cursor moves the gap around the buffer, the prefix and suffix getting
shorter/longer as required.
## Implementation
I'll be writing a sample implementation in Python, as with the rest of the
[series]({{< ref "/series/cool-algorithms/" >}}). I don't think it showcases the
elegance of the _Gap Buffer_ in action like a C implementation full of
`memmove`s would, but it does makes it short and sweet.
### Representation
We'll be representing the gap buffer as an actual list of characters.
Given that Python doesn't _have_ characters, let's settle for a list of strings,
each representing a single character...
```python
Char = str
class GapBuffer:
# List of characters, contains prefix and suffix of string with gap in the middle
_buf: list[Char]
# The gap is contained between [start, end) (i.e: buf[start:end])
_gap_start: int
_gap_end: int
# Visual representation of the gap buffer:
# This is a very [ ]long string.
# |<----------------------------------------------->| capacity
# |<------------>| |<-------->| string
# |<------------------->| gap
# |<------------>| prefix
# |<-------->| suffix
def __init__(self, initial_capacity: int = 16) -> None:
assert initial_capacity > 0
# Initialize an empty gap buffer
self._buf = [""] * initial_capacity
self._gap_start = 0
self._gap_end = initial_capacity
```
### Accessors
I'm mostly adding these for exposition, and making it easier to write `assert`s
later.
```python
@property
def capacity(self) -> int:
return len(self._buf)
@property
def gap_length(self) -> int:
return self._gap_end - self._gap_start
@property
def string_length(self) -> int:
return self.capacity - self.gap_length
@property
def prefix_length(self) -> int:
return self._gap_start
@property
def suffix_length(self) -> int:
return self.capacity - self._gap_end
```
### Growing the buffer
I've written this method in a somewhat non-idiomatic manner, to make it closer
to how it would look in C using `realloc` instead.
It would be more efficient to use slicing to insert the needed extra capacity
directly, instead of making a new buffer and copying characters over.
```python
def grow(self, capacity: int) -> None:
assert capacity >= self.capacity
# Create a new buffer with the new capacity
new_buf = [""] * capacity
# Move the prefix/suffix to their place in the new buffer
added_capacity = capacity - len(self._buf)
new_buf[: self._gap_start] = self._buf[: self._gap_start]
new_buf[self._gap_end + added_capacity :] = self._buf[self._gap_end :]
# Use the new buffer, account for added capacity
self._buf = new_buf
self._gap_end += added_capacity
```
### Insertion
Inserting text at the cursor's position means filling up the gap in the middle
of the buffer. To do so we must first make sure that the gap is big enough, or
grow the buffer accordingly.
Then inserting the text is simply a matter of copying its characters in place,
and moving the start of the gap further right.
```python
def insert(self, val: str) -> None:
# Ensure we have enough space to insert the whole string
if len(val) > self.gap_length:
self.grow(max(self.capacity * 2, self.string_length + len(val)))
# Fill the gap with the given string
self._buf[self._gap_start : self._gap_start + len(val)] = val
self._gap_start += len(val)
```
### Deletion
Removing text from the buffer simply expands the gap in the corresponding
direction, shortening the string's prefix/suffix. This makes it very cheap.
The methods are named after the `backspace` and `delete` keys on the keyboard.
```python
def backspace(self, dist: int = 1) -> None:
assert dist <= self.prefix_length
# Extend gap to the left
self._gap_start -= dist
def delete(self, dist: int = 1) -> None:
assert dist <= self.suffix_length
# Extend gap to the right
self._gap_end += dist
```
### Moving the cursor
Moving the cursor along the buffer will shift letters from one side of the gap
to the other, moving them across from prefix to suffix and back.
I find Python's list slicing not quite as elegant to read as a `memmove`, though
it does make for a very small and efficient implementation.
```python
def left(self, dist: int = 1) -> None:
assert dist <= self.prefix_length
# Shift the needed number of characters from end of prefix to start of suffix
self._buf[self._gap_end - dist : self._gap_end] = self._buf[
self._gap_start - dist : self._gap_start
]
# Adjust indices accordingly
self._gap_start -= dist
self._gap_end -= dist
def right(self, dist: int = 1) -> None:
assert dist <= self.suffix_length
# Shift the needed number of characters from start of suffix to end of prefix
self._buf[self._gap_start : self._gap_start + dist] = self._buf[
self._gap_end : self._gap_end + dist
]
# Adjust indices accordingly
self._gap_start += dist
self._gap_end += dist
```

View file

@ -1,97 +0,0 @@
---
title: "Bloom Filter"
date: 2024-07-14T17:46:40+01:00
draft: false # I don't care for draft mode, git has branches for that
description: "Probably cool"
tags:
- algorithms
- data structures
- python
categories:
- programming
series:
- Cool algorithms
favorite: false
disable_feed: false
---
The [_Bloom Filter_][wiki] is a probabilistic data structure for set membership.
The filter can be used as an inexpensive first step when querying the actual
data is quite costly (e.g: as a first check for expensive cache lookups or large
data seeks).
[wiki]: https://en.wikipedia.org/wiki/Bloom_filter
<!--more-->
## What does it do?
A _Bloom Filter_ can be understood as a hash-set which can either tell you:
* An element is _not_ part of the set.
* An element _may be_ part of the set.
More specifically, one can tweak the parameters of the filter to make it so that
the _false positive_ rate of membership is quite low.
I won't be going into those calculations here, but they are quite trivial to
compute, or one can just look up appropriate values for their use case.
## Implementation
I'll be using Python, which has the nifty ability of representing bitsets
through its built-in big integers quite easily.
We'll be assuming a `BIT_COUNT` of 64 here, but the implementation can easily be
tweaked to use a different number, or even change it at construction time.
### Representation
A `BloomFilter` is just a set of bits and a list of hash functions.
```python
BIT_COUNT = 64
class BloomFilter[T]:
_bits: int
_hash_functions: list[Callable[[T], int]]
def __init__(self, hash_functions: list[Callable[[T], int]]) -> None:
# Filter is initially empty
self._bits = 0
self._hash_functions = hash_functions
```
### Inserting a key
To add an element to the filter, we take the output from each hash function and
use that to set a bit in the filter. This combination of bit will identify the
element, which we can use for lookup later.
```python
def insert(self, val: T) -> None:
# Iterate over each hash
for f in self._hash_functions:
n = f(val) % BIT_COUNT
# Set the corresponding bit
self._bit |= 1 << n
```
### Querying a key
Because the _Bloom Filter_ does not actually store its elements, but some
derived data from hashing them, it can only definitely say if an element _does
not_ belong to it. Otherwise, it _may_ be part of the set, and should be checked
against the actual underlying store.
```python
def may_contain(self, val: T) -> bool:
for f in self._hash_functions:
n = f(val) % BIT_COUNT
# If one of the bits is unset, the value is definitely not present
if not (self._bit & (1 << n)):
return False
# All bits were matched, `val` is likely to be part of the set
return True
```

View file

@ -1,159 +0,0 @@
---
title: "Treap"
date: 2024-07-20T14:12:27+01:00
draft: false # I don't care for draft mode, git has branches for that
description: "A simpler BST"
tags:
- algorithms
- data structures
- python
categories:
- programming
series:
- Cool algorithms
favorite: false
disable_feed: false
graphviz: true
---
The [_Treap_][wiki] is a mix between a _Binary Search Tree_ and a _Heap_.
Like a _Binary Search Tree_, it keeps an ordered set of keys in the shape of a
tree, allowing for binary search traversal.
Like a _Heap_, it associates each node with a priority, making sure that a
parent's priority is always higher than any of its children.
[wiki]: https://en.wikipedia.org/wiki/Treap
<!--more-->
## What does it do?
By randomizing the priority value of each key at insertion time, we ensure a
high likelihood that the tree stays _roughly_ balanced, avoiding degenerating to
unbalanced O(N) height.
Here's a sample tree created by inserting integers from 0 to 250 into the tree:
{{< graphviz file="treap.gv" />}}
## Implementation
I'll be keeping the theme for this [series] by using Python to implement the
_Treap_. This leads to somewhat annoying code to handle the rotation process,
which is easier to do in C using pointers.
[series]: {{< ref "/series/cool-algorithms/" >}}
### Representation
Creating a new `Treap` is easy: the tree starts off empty, waiting for new nodes
to insert.
Each `Node` must keep track of the `key`, the mapped `value`, and the node's
`priority` (which is assigned randomly). Finally it must also allow for storing
two children (`left` and `right`).
```python
class Node[K, V]:
key: K
value: V
priority: float
left: Node[K, V] | None
righg: Node[K, V] | None
def __init__(self, key: K, value: V):
# Store key and value, like a normal BST node
self.key = key
self.value = value
# Priority is derived randomly
self.priority = random()
self.left = None
self.right = None
class Treap[K, V]:
_root: Node[K, V] | None
def __init__(self):
# The tree starts out empty
self._root = None
```
### Search
Searching the tree is the same as in any other _Binary Search Tree_.
```python
def get(self, key: K) -> T | None:
node = self._root
# The usual BST traversal
while node is not None:
if node.key == key:
return node.value
elif node.key < key:
node = node.right
else:
node = node.left
return None
```
### Insertion
To insert a new `key` into the tree, we identify which leaf position it should
be inserted at. We then generate the node's priority, insert it at this
position, and rotate the node upwards until the heap property is respected.
```python
type ChildField = Literal["left, right"]
def insert(self, key: K, value: V) -> bool:
# Empty treap base-case
if self._root is None:
self._root = Node(key, value)
# Signal that we're not overwriting the value
return False
# Keep track of the parent chain for rotation after insertion
parents = []
node = self._root
while node is not None:
# Insert a pre-existing key
if node.key == key:
node.value = value
return True
# Go down the tree, keep track of the path through the tree
field = "left" if key < node.key else "right"
parents.append((node, field))
node = getattr(node, field)
# Key wasn't found, we're inserting a new node
child = Node(key, value)
parent, field = parents[-1]
setattr(parent, field, child)
# Rotate the new node up until we respect the decreasing priority property
self._rotate_up(child, parents)
# Key wasn't found, signal that we inserted a new node
return False
def _rotate_up(
self,
node: Node[K, V],
parents: list[tuple[Node[K, V], ChildField]],
) -> None:
while parents:
parent, field = parents.pop()
# If the parent has higher priority, we're done rotating
if parent.priority >= node.priority:
break
# Check for grand-parent/root of tree edge-case
if parents:
# Update grand-parent to point to the new rotated node
grand_parent, field = parents[-1]
setattr(grand_parent, field, node)
else:
# Point the root to the new rotated node
self._root = node
other_field = "left" if field == "right" else "right"
# Rotate the node up
setattr(parent, field, getattr(node, other_field))
setattr(node, other_field, parent)
```

File diff suppressed because it is too large Load diff

View file

@ -1,146 +0,0 @@
---
title: "Treap, revisited"
date: 2024-07-27T14:12:27+01:00
draft: false # I don't care for draft mode, git has branches for that
description: "An even simpler BST"
tags:
- algorithms
- data structures
- python
categories:
- programming
series:
- Cool algorithms
favorite: false
disable_feed: false
---
My [last post]({{< relref "../2024-07-20-treap/index.md" >}}) about the _Treap_
showed an implementation using tree rotations, as is commonly done with [AVL
Trees][avl] and [Red Black Trees][rb].
But the _Treap_ lends itself well to a simple and elegant implementation with no
tree rotations. This makes it especially easy to implement the removal of a key,
rather than the fiddly process of deletion using tree rotations.
[avl]: https://en.wikipedia.org/wiki/AVL_tree
[rb]: https://en.wikipedia.org/wiki/Red%E2%80%93black_tree
<!--more-->
## Implementation
All operations on the tree will be implemented in terms of two fundamental
operations: `split` and `merge`.
We'll be reusing the same structures as in the last post, so let's skip straight
to implementing those fundaments, and building on them for `insert` and
`delete`.
### Split
Splitting a tree means taking a key, and getting the following output:
* a `left` node, root of the tree of all keys lower than the input.
* an extracted `node` which corresponds to the input `key`.
* a `right` node, root of the tree of all keys higher than the input.
```python
type OptionalNode[K, V] = Node[K, V] | None
class SplitResult(NamedTuple):
left: OptionalNode
node: OptionalNode
right: OptionalNode
def split(root: OptionalNode[K, V], key: K) -> SplitResult:
# Base case, empty tree
if root is None:
return SplitResult(None, None, None)
# If we found the key, simply extract left and right
if root.key == key:
left, right = root.left, root.right
root.left, root.right = None, None
return SplitResult(left, root, right)
# Otherwise, recurse on the corresponding side of the tree
if root.key < key:
left, node, right = split(root.right, key)
root.right = left
return SplitResult(root, node, right)
if key < root.key:
left, node, right = split(root.left, key)
root.left = right
return SplitResult(left, node, root)
raise RuntimeError("Unreachable")
```
### Merge
Merging a `left` and `right` tree means (cheaply) building a new tree containing
both of them. A pre-condition for merging is that the `left` tree is composed
entirely of nodes that are lower than any key in `right` (i.e: as in `left` and
`right` after a `split`).
```python
def merge(
left: OptionalNode[K, V],
right: OptionalNode[K, V],
) -> OptionalNode[K, V]:
# Base cases, left or right being empty
if left is None:
return right
if right is None:
return left
# Left has higher priority, it must become the root node
if left.priority >= right.priority:
# We recursively reconstruct its right sub-tree
left.right = merge(left.right, right)
return left
# Right has higher priority, it must become the root node
if left.priority < right.priority:
# We recursively reconstruct its left sub-tree
right.left = merge(left, right.left)
return right
raise RuntimeError("Unreachable")
```
### Insertion
Inserting a node into the tree is done in two steps:
1. `split` the tree to isolate the middle insertion point
2. `merge` it back up to form a full tree with the inserted key
```python
def insert(self, key: K, value: V) -> bool:
# `left` and `right` come before/after the key
left, node, right = split(self._root, key)
was_updated: bool
# Create the node, or update its value, if the key was already in the tree
if node is None:
node = Node(key, value)
was_updated = False
else:
node.value = value
was_updated = True
# Rebuild the tree with a couple of merge operations
self._root = merge(left, merge(node, right))
# Signal whether the key was already in the key
return was_updated
```
### Removal
Removing a key from the tree is similar to inserting a new key, and forgetting
to insert it back: simply `split` the tree and `merge` it back without the
extracted middle node.
```python
def remove(self, key: K) -> bool:
# `node` contains the key, or `None` if the key wasn't in the tree
left, node, right = split(self._root, key)
# Put the tree back together, without the extract node
self._root = merge(left, right)
# Signal whether `key` was mapped in the tree
return node is not None
```

View file

@ -1,145 +0,0 @@
---
title: "Reservoir Sampling"
date: 2024-08-02T18:30:56+01:00
draft: false # I don't care for draft mode, git has branches for that
description: "Elegantly sampling a stream"
tags:
- algorithms
- python
categories:
- programming
series:
- Cool algorithms
favorite: false
disable_feed: false
mathjax: true
---
[_Reservoir Sampling_][reservoir] is an [online][online], probabilistic
algorithm to uniformly sample $k$ random elements out of a stream of values.
It's a particularly elegant and small algorithm, only requiring $\Theta(k)$
amount of space and a single pass through the stream.
[reservoir]: https://en.wikipedia.org/wiki/Reservoir_sampling
[online]: https://en.wikipedia.org/wiki/Online_algorithm
<!--more-->
## Sampling one element
As an introduction, we'll first focus on fairly sampling one element from the
stream.
```python
def sample_one[T](stream: Iterable[T]) -> T:
stream_iter = iter(stream)
# Sample the first element
res = next(stream_iter)
for i, val in enumerate(stream_iter, start=1):
j = random.randint(0, i)
# Replace the sampled element with probability 1/(i + 1)
if j == 0:
res = val
# Return the randomly sampled element
return res
```
### Proof
Let's now prove that this algorithm leads to a fair sampling of the stream.
We'll be doing proof by induction.
#### Hypothesis $H_N$
After iterating through the first $N$ items in the stream,
each of them has had an equal $\frac{1}{N}$ probability of being selected as
`res`.
#### Base Case $H_1$
We can trivially observe that the first element is always assigned to `res`,
$\frac{1}{1} = 1$, the hypothesis has been verified.
#### Inductive Case
For a given $N$, let us assume that $H_N$ holds. Let us now look at the events
of loop iteration where `i = N` (i.e: observation of the $N + 1$-th item in the
stream).
`j = random.randint(0, i)` uniformly selects a value in the range $[0, i]$,
a.k.a $[0, N]$. We then have two cases:
* `j == 0`, with probability $\frac{1}{N + 1}$: we select `val` as the new
reservoir element `res`.
* `j != 0`, with probability $\frac{N}{N + 1}$: we keep the previous value of
`res`. By $H_N$, any of the first $N$ elements had a $\frac{1}{N}$ probability
of being `res` before at the start of the loop, each element now has a
probability $\frac{1}{N} \cdot \frac{N}{N + 1} = \frac{1}{N + 1}$ of being the
element.
And thus, we have proven $H_{N + 1}$ at the end of the loop.
## Sampling $k$ element
The code for sampling $k$ elements is very similar to the one-element case.
```python
def sample[T](stream: Iterable[T], k: int = 1) -> list[T]:
stream_iter = iter(stream)
# Retain the first 'k' elements in the reservoir
res = list(itertools.islice(stream_iter, k))
for i, val in enumerate(stream_iter, start=k):
j = random.randint(0, i)
# Replace one element at random with probability k/(i + 1)
if j < k:
res[j] = val
# Return 'k' randomly sampled elements
return res
```
### Proof
Let us once again do a proof by induction, assuming the stream contains at least
$k$ items.
#### Hypothesis $H_N$
After iterating through the first $N$ items in the stream, each of them has had
an equal $\frac{k}{N}$ probability of being sampled from the stream.
#### Base Case $H_k$
We can trivially observe that the first $k$ element are sampled at the start of
the algorithm, $\frac{k}{k} = 1$, the hypothesis has been verified.
#### Inductive Case
For a given $N$, let us assume that $H_N$ holds. Let us now look at the events
of the loop iteration where `i = N`, in order to prove $H_{N + 1}$.
`j = random.randint(0, i)` uniformly selects a value in the range $[0, i]$,
a.k.a $[0, N]$. We then have three cases:
* `j >= k`, with probability $1 - \frac{k}{N + 1}$: we do not modify the
sampled reservoir at all.
* `j < k`, with probability $\frac{k}{N + 1}$: we sample the new element to
replace the `j`-th element of the reservoir. Therefore for any element
$e \in [0, k[$ we can either have:
* $j = e$: the element _is_ replaced, probability $\frac{1}{k}$.
* $j \neq e$: the element is _not_ replaced, probability $\frac{k - 1}{k}$.
We can now compute the probability that a previously sampled element is kept in
the reservoir:
$1 - \frac{k}{N + 1} + \frac{k}{N + 1} \cdot \frac{k - 1}{k} = \frac{N}{N + 1}$.
By $H_N$, any of the first $N$ elements had a $\frac{k}{N}$ probability
of being sampled before at the start of the loop, each element now has a
probability $\frac{k}{N} \cdot \frac{N}{N + 1} = \frac{k}{N + 1}$ of being the
element.
We have now proven that all elements have a probability $\frac{k}{N + 1}$ of
being sampled at the end of the loop, therefore $H_{N + 1}$ has been verified.

View file

@ -1,472 +0,0 @@
---
title: "k-d Tree"
date: 2024-08-10T11:50:33+01:00
draft: false # I don't care for draft mode, git has branches for that
description: "Points in spaaaaace!"
tags:
- algorithms
- data structures
- python
categories:
- programming
series:
- Cool algorithms
favorite: false
disable_feed: false
---
The [_k-d Tree_][wiki] is a useful way to map points in space and make them
efficient to query.
I ran into them during my studies in graphics, as they are one of the
possible acceleration structures for [ray-casting] operations.
[wiki]: https://en.wikipedia.org/wiki/K-d_tree
[ray-casting]: https://en.wikipedia.org/wiki/Ray_casting
<!--more-->
## Implementation
As usual, this will be in Python, though its lack of proper discriminated enums
makes it more verbose than would otherwise be necessary.
### Pre-requisites
Let's first define what kind of space our _k-d Tree_ is dealing with. In this
instance $k = 3$ just like in the normal world.
```python
class Point(NamedTuple):
x: float
y: float
z: float
class Axis(IntEnum):
X = 0
Y = 1
Z = 2
def next(self) -> Axis:
# Each level of the tree is split along a different axis
return Axis((self + 1) % 3)
```
### Representation
The tree is represented by `KdTree`, each of its leaf nodes is a `KdLeafNode`
and its inner nodes are `KdSplitNode`s.
For each point in space, the tree can also keep track of an associated value,
similar to a dictionary or other mapping data structure. Hence we will make our
`KdTree` generic to this mapped type `T`.
#### Leaf node
A leaf node contains a number of points that were added to the tree. For each
point, we also track their mapped value, hence the `dict[Point, T]`.
```python
class KdLeafNode[T]:
points: dict[Point, T]
def __init__(self):
self.points = {}
```
#### Split node
An inner node must partition the space into two sub-spaces along a given axis
and mid-point (thus defining a plane). All points that are "to the left" of the
plane will be kept in one child, while all the points "to the right" will be in
the other. Similar to a [_Binary Search Tree_][bst]'s inner nodes.
[bst]: https://en.wikipedia.org/wiki/Binary_search_tree
```python
class KdSplitNode[T]:
axis: Axis
mid: float
children: tuple[KdTreeNode[T], KdTreeNode[T]]
# Convenience function to index into the child which contains `point`
def _index(self, point: Point) -> int:
return 0 if point[self.axis] <= self.mid else 1
```
#### Tree
The tree itself is merely a wrapper around its inner nodes.
Once annoying issue about writing this in Python is the lack of proper
discriminated enum types. So we need to create a wrapper type for the nodes
(`KdNode`) to allow for splitting when updating the tree.
```python
class KdNode[T]:
# Wrapper around leaf/inner nodes, the poor man's discriminated enum
inner: KdLeafNode[T] | KdSplitNode[T]
def __init__(self):
self.inner = KdLeafNode()
# Convenience constructor used when splitting a node
@classmethod
def from_items(cls, items: Iterable[tuple[Point, T]]) -> KdNode[T]:
res = cls()
res.inner.points.update(items)
return res
class KdTree[T]:
_root: KdNode[T]
def __init__(self):
# Tree starts out empty
self._root = KdNode()
```
### Inserting a point
To add a point to the tree, we simply recurse from node to node, similar to a
_BST_'s insertion algorithm. Once we've found the correct leaf node to insert
our point into, we simply do so.
If that leaf node goes over the maximum number of points it can store, we must
then split it along an axis, cycling between `X`, `Y`, and `Z` at each level of
the tree (i.e: splitting along the `X` axis on the first level, then `Y` on the
second, then `Z` after that, and then `X`, etc...).
```python
# How many points should be stored in a leaf node before being split
MAX_CAPACITY = 32
def median(values: Iterable[float]) -> float:
sorted_values = sorted(values)
mid_point = len(sorted_values) // 2
if len(sorted_values) % 2 == 1:
return sorted_values[mid_point]
a, b = sorted_values[mid_point], sorted_values[mid_point + 1]
return a + (b - a) / 2
def partition[T](
pred: Callable[[T], bool],
iterable: Iterable[T]
) -> tuple[list[T], list[T]]:
truths, falses = [], []
for v in iterable:
(truths if pred(v) else falses).append(v)
return truths, falses
def split_leaf[T](node: KdLeafNode[T], axis: Axis) -> KdSplitNode[T]:
# Find the median value for the given axis
mid = median(p[axis] for p in node.points)
# Split into left/right children according to the mid-point and axis
left, right = partition(lambda kv: kv[0][axis] <= mid, node.points.items())
return KdSplitNode(
split_axis,
mid,
(KdNode.from_items(left), KdNode.from_items(right)),
)
class KdTree[T]:
def insert(self, point: Point, val: T) -> bool:
# Forward to the root node, choose `X` as the first split axis
return self._root.insert(point, val, Axis.X)
class KdLeafNode[T]:
def insert(self, point: Point, val: T, split_axis: Axis) -> bool:
# Check whether we're overwriting a previous value
was_mapped = point in self.points
# Store the corresponding value
self.points[point] = val
# Return whether we've performed an overwrite
return was_mapped
class KdSplitNode[T]:
def insert(self, point: Point, val: T, split_axis: Axis) -> bool:
# Find the child which contains the point
child = self.children[self._index(point)]
# Recurse into it, choosing the next split axis
return child.insert(point, val, split_axis.next())
class KdNode[T]:
def insert(self, point: Point, val: T, split_axis: Axis) -> bool:
# Add the point to the wrapped node...
res = self.inner.insert(point, val, split_axis)
# ... And take care of splitting leaf nodes when necessary
if (
isinstance(self.inner, KdLeafNode)
and len(self.inner.points) > MAX_CAPACITY
):
self.inner = split_leaf(self.inner, split_axis)
return res
```
### Searching for a point
Looking for a given point in the tree look very similar to a _BST_'s search,
each leaf node dividing the space into two sub-spaces, only one of which
contains the point.
```python
class KdTree[T]:
def lookup(self, point: Point) -> T | None:
# Forward to the root node
return self._root.lookup(point)
class KdNode[T]:
def lookup(self, point: Point) -> T | None:
# Forward to the wrapped node
return self.inner.lookup(point)
class KdLeafNode[T]:
def lookup(self, point: Point) -> T | None:
# Simply check whether we've stored the point in this leaf
return self.points.get(point)
class KdSplitNode[T]:
def lookup(self, point: Point) -> T | None:
# Recurse into the child which contains the point
return self.children[self._index(point)].lookup(point)
```
### Closest points
Now to look at the most interesting operation one can do on a _k-d Tree_:
querying for the objects which are closest to a given point (i.e: the [Nearest
neighbour search][nns].
This is a more complicated algorithm, which will also need some modifications to
current _k-d Tree_ implementation in order to track just a bit more information
about the points it contains.
[nns]: https://en.wikipedia.org/wiki/Nearest_neighbor_search
#### A notion of distance
To search for the closest points to a given origin, we first need to define
which [distance](https://en.wikipedia.org/wiki/Distance) we are using in our
space.
For this example, we'll simply be using the usual definition of [(Euclidean)
distance][euclidean-distance].
[euclidean-distance]: https://en.wikipedia.org/wiki/Euclidean_distance
```python
def dist(point: Point, other: Point) -> float:
return sqrt(sum((a - b) ** 2 for a, b in zip(self, other)))
```
#### Tracking the tree's boundaries
To make the query efficient, we'll need to track the tree's boundaries: the
bounding box of all points contained therein. This will allow us to stop the
search early once we've found enough points and can be sure that the rest of the
tree is too far away to qualify.
For this, let's define the `AABB` (Axis-Aligned Bounding Box) class.
```python
class Point(NamedTuple):
# Convenience function to replace the coordinate along a given dimension
def replace(self, axis: Axis, new_coord: float) -> Point:
coords = list(self)
coords[axis] = new_coord
return Point(coords)
class AABB(NamedTuple):
# Lowest coordinates in the box
low: Point
# Highest coordinates in the box
high: Point
# An empty box
@classmethod
def empty(cls) -> AABB:
return cls(
Point(*(float("inf"),) * 3),
Point(*(float("-inf"),) * 3),
)
# Split the box into two along a given axis for a given mid-point
def split(axis: Axis, mid: float) -> tuple[AABB, AABB]:
assert self.low[axis] <= mid <= self.high[axis]
return (
AABB(self.low, self.high.replace(axis, mid)),
AABB(self.low.replace(axis, mid), self.high),
)
# Extend a box to contain a given point
def extend(self, point: Point) -> None:
low = NamedTuple(*(map(min, zip(self.low, point))))
high = NamedTuple(*(map(max, zip(self.high, point))))
return AABB(low, high)
# Return the shortest between a given point and the box
def dist_to_point(self, point: Point) -> float:
deltas = (
max(self.low[axis] - point[axis], 0, point[axis] - self.high[axis])
for axis in Axis
)
return dist(Point(0, 0, 0), Point(*deltas))
```
And do the necessary modifications to the `KdTree` to store the bounding box and
update it as we add new points.
```python
class KdTree[T]:
_root: KdNode[T]
# New field: to keep track of the tree's boundaries
_aabb: AABB
def __init__(self):
self._root = KdNode()
# Initialize the empty tree with an empty bounding box
self._aabb = AABB.empty()
def insert(self, point: Point, val: T) -> bool:
# Extend the AABB for our k-d Tree when adding a point to it
self._aabb = self._aabb.extend(point)
return self._root.insert(point, val, Axis.X)
```
#### `MaxHeap`
Python's builtin [`heapq`][heapq] module provides the necessary functions to
create and interact with a [_Priority Queue_][priority-queue], in the form of a
[_Binary Heap_][binary-heap].
Unfortunately, Python's library maintains a _min-heap_, which keeps the minimum
element at the root. For this algorithm, we're interested in having a
_max-heap_, with the maximum at the root.
Thankfully, one can just reverse the comparison function for each element to
convert between the two. Let's write a `MaxHeap` class making use of this
library, with a `Reverse` wrapper class to reverse the order of elements
contained within it (similar to [Rust's `Reverse`][reverse]).
[binary-heap]: https://en.wikipedia.org/wiki/Binary_heap
[heapq]: https://docs.python.org/3/library/heapq.html
[priority-queue]: https://en.wikipedia.org/wiki/Priority_queue
[reverse]: https://doc.rust-lang.org/std/cmp/struct.Reverse.html
```python
# Reverses the wrapped value's ordering
@functools.total_ordering
class Reverse[T]:
value: T
def __init__(self, value: T):
self.value = value
def __lt__(self, other: Reverse[T]) -> bool:
return self.value > other.value
def __eq__(self, other: Reverse[T]) -> bool:
return self.value == other.value
class MaxHeap[T]:
_heap: list[Reverse[T]]
def __init__(self):
self._heap = []
def __len__(self) -> int:
return len(self._heap)
def __iter__(self) -> Iterator[T]:
yield from (item.value for item in self._heap)
# Push a value on the heap
def push(self, value: T) -> None:
heapq.heappush(self._heap, Reverse(value))
# Peek at the current maximum value
def peek(self) -> T:
return self._heap[0].value
# Pop and return the highest value
def pop(self) -> T:
return heapq.heappop(self._heap).value
# Pushes a value onto the heap, pops and returns the highest value
def pushpop(self, value: T) -> None:
return heapq.heappushpop(self._heap, Reverse(value)).value
```
#### The actual Implementation
Now that we have written the necessary building blocks, let's tackle the
Implementation of `closest` for our _k-d Tree_.
```python
# Wrapper type for closest points, ordered by `distance`
@dataclasses.dataclass(order=True)
class ClosestPoint[T](NamedTuple):
point: Point = field(compare=False)
value: T = field(compare=False)
distance: float
class KdTree[T]:
def closest(self, point: Point, n: int = 1) -> list[ClosestPoint[T]]:
assert n > 0
# Create the output heap
res = MaxHeap()
# Recurse onto the root node
self._root.closest(point, res, n, self._aabb)
# Return the resulting list, from closest to farthest
return sorted(res)
class KdNode[T]:
def closest(
self,
point: Point,
out: MaxHeap[ClosestPoint[T]],
n: int,
bounds: AABB,
) -> None:
# Forward to the wrapped node
self.inner.closest(point, out, n, bounds)
class KdLeafNode[T]:
def closest(
self,
point: Point,
out: MaxHeap[ClosestPoint[T]],
n: int,
bounds: AABB,
) -> None:
# At the leaf, simply iterate over all points and add them to the heap
for p, val in self.points.items():
item = ClosestPoint(p, val, dist(p, point))
if len(out) < n:
# If the heap isn't full, just push
out.push(item)
elif out.peek().distance > item.distance:
# Otherwise, push and pop to keep the heap at `n` elements
out.pushpop(item)
class KdSplitNode[T]:
def closest(
self,
point: Point,
out: list[ClosestPoint[T]],
n: int,
bounds: AABB,
) -> None:
index = self._index(point)
children_bounds = bounds.split(self.axis, self.mid)
# Iterate over the child which contains the point, then its neighbour
for i in (index, 1 - index):
child, bounds = self.children[i], children_bounds[i]
# `min_dist` is 0 for the first child, and the minimum distance of
# all points contained in the second child
min_dist = bounds.dist_to_point(point)
# If the heap is at capacity and the child to inspect too far, stop
if len(out) == n and min_dist > out.peek().distance:
return
# Otherwise, recurse
child.closest(point, out, n, bounds)
```

View file

@ -1,112 +0,0 @@
---
title: "Kd Tree Revisited"
date: 2024-08-17T14:20:22+01:00
draft: false # I don't care for draft mode, git has branches for that
description: "Simplifying the nearest neighbour search"
tags:
- algorithms
- data structures
- python
categories:
- programming
series:
- Cool algorithms
favorite: false
disable_feed: false
---
After giving it a bit of thought, I've found a way to simplify the nearest
neighbour search (i.e: the `closest` method) for the `KdTree` I implemented in
[my previous post]({{< relref "../2024-08-10-kd-tree/index.md" >}}).
<!--more-->
## The improvement
That post implemented the nearest neighbour search by keeping track of the
tree's boundaries (through `AABB`), and each of its sub-trees (through
`AABB.split`), and testing for the early exit condition by computing the
distance of the search's origin to each sub-tree's boundaries.
Instead of _explicitly_ keeping track of each sub-tree's boundaries, we can
implicitly compute it when recursing down the tree.
To check for the distance between the queried point and the splitting plane of
inner nodes: we simply need to project the origin onto that plane, thus giving
us a minimal bound on the distance of the points stored on the other side.
This can be easily computed from the `axis` and `mid` values which are stored in
the inner nodes: to project the node on the plane we simply replace its
coordinate for this axis by `mid`.
## Simplified search
With that out of the way, let's now see how `closest` can be implemented without
needing to track the tree's `AABB` at the root:
```python
# Wrapper type for closest points, ordered by `distance`
@dataclasses.dataclass(order=True)
class ClosestPoint[T](NamedTuple):
point: Point = field(compare=False)
value: T = field(compare=False)
distance: float
class KdTree[T]:
def closest(self, point: Point, n: int = 1) -> list[ClosestPoint[T]]:
assert n > 0
res = MaxHeap()
# Instead of passing an `AABB`, we give an initial projection point,
# the query origin itself (since we haven't visited any split node yet)
self._root.closest(point, res, n, point)
return sorted(res)
class KdNode[T]:
def closest(
self,
point: Point,
out: MaxHeap[ClosestPoint[T]],
n: int,
projection: Point,
) -> None:
# Same implementation
self.inner.closest(point, out, n, bounds)
class KdLeafNode[T]:
def closest(
self,
point: Point,
out: MaxHeap[ClosestPoint[T]],
n: int,
projection: Point,
) -> None:
# Same implementation
for p, val in self.points.items():
item = ClosestPoint(p, val, dist(p, point))
if len(out) < n:
out.push(item)
elif out.peek().distance > item.distance:
out.pushpop(item)
class KdSplitNode[T]:
def closest(
self,
point: Point,
out: list[ClosestPoint[T]],
n: int,
projection: Point,
) -> None:
index = self._index(point)
self.children[index].closest(point, out, n, projection)
# Project onto the splitting plane, for a minimum distance to its points
projection = projection.replace(self.axis, self.mid)
# If we're at capacity and can't possibly find any closer points, exit
if len(out) == n and dist(point, projection) > out.peek().distance:
return
# Otherwise recurse on the other side to check for nearer neighbours
self.children[1 - index].closest(point, out, n, projection)
```
As you can see, the main difference is in `KdSplitNode`'s implementation, where
we can quickly compute the minimum distance between the search's origin and all
potential points in that subspace.

View file

@ -16,7 +16,7 @@ favorite: false
The flyweight is a well-known
[GoF](https://en.wikipedia.org/wiki/Design_Patterns) design pattern.
Its intent is to minimize memory usage by reducing the number of instantiations
It's intent is to minimize memory usage by reducing the number of instantiations
of a given object.
I will show you how to implement a robust flyweight in C++, as well as a way to

View file

@ -2,12 +2,14 @@
title: "Git Basics"
date: 2020-12-07 18:54:31+0100
draft: false # I don't care for draft mode, git has branches for that
description: "The next step after the basics"
description: ""
tags:
- git
- cli
categories:
- programming
series:
- Git basics
favorite: false
---
@ -134,7 +136,7 @@ branch.
#### Fixup, a practical example
A specific kind of squashing which I use frequently is the notion of `fixup`s.
Say you've committed a change (*A*), and later on notice that it is missing
Say you've commited a change (*A*), and later on notice that it is missing
a part of the changeset. You can decide to commit that missing part (*A-bis*)
and annotate it to mean that it is linked to *A*.
@ -184,7 +186,7 @@ After applying the rebase, you find yourself with the complete change inside
This is especially useful when you want to apply suggestion on a merge request
after it was reviewed. You can keep a clean history without those pesky `Apply
suggestion ...` commits being part of your history.
suggestion ...` commmits being part of your history.
### Lost commits and the reflog
@ -315,7 +317,7 @@ easily choose which parts of your changes should end up in the same commit.
Here's a list of commands that you should read-up on, but I won't be presenting
further:
* `git bisect`
* `git bissect`
* `git rerere`
* `git stash`
* and more...

View file

@ -8,8 +8,6 @@ tags:
categories:
favorite: false
tikz: true
graphviz: true
mermaid: true
---
## Test post please ignore
@ -42,29 +40,6 @@ echo hello world | cut -d' ' -f 1
\end{tikzpicture}
{{% /tikz %}}
### Graphviz support
{{% graphviz %}}
graph {
a -- b
b -- c
c -- a
}
{{% /graphviz %}}
### Mermaid support
{{% mermaid %}}
graph TD
A[Enter Chart Definition] --> B(Preview)
B --> C{decide}
C --> D[Keep]
C --> E[Edit Definition]
E --> B
D --> F[Save Image and Code]
F --> B
{{% /graphviz %}}
### Spoilers
{{% spoiler "Don't open me" %}}

View file

@ -14,8 +14,8 @@ favorite: false
---
Coming back from our last post about [generic flyweights in C++]({{< relref
"../2020-07-16-generic-flyweight-cpp/index.md" >}}), we can write a flyweight
that can be used with any abstract base classes.
"generic-flyweight-cpp.md" >}}), we can write a flyweight that can be used with
any abstract base classes.
<!--more-->
@ -68,7 +68,7 @@ public:
const std::type_index lhs_i(lhs);
const std::type_index rhs_i(rhs);
if (lhs_i != rhs_i)
return lhs_i < rhs_i;
returh lhs_i < rhs_i;
// We are now assured that both classes have the same type
return less_than(rhs);
}

View file

@ -8,19 +8,7 @@ disable_feed: true
A few of my Amazon wish lists in case you want to give me a gift.
* [Wish list](https://www.amazon.fr/hz/wishlist/ls/1FT0IO9JJTX57)
* ~~[Board games](https://www.amazon.fr/hz/wishlist/ls/2NY50W36THGMW)~~
* See the [Board Game Geek list](https://boardgamegeek.com/wishlist/Ambroisie)
which is better curated and more up-to-date
* [Board games](https://www.amazon.fr/hz/wishlist/ls/2NY50W36THGMW)
* [Cooking & kitchenware](https://www.amazon.fr/hz/wishlist/ls/2MNRCLPNABZSU)
* [Data Structures & Algorithms](https://www.amazon.fr/hz/wishlist/ls/2XZPQSBOGOFC3)
* [Programming](https://www.amazon.fr/hz/wishlist/ls/1R4KFV4H2D8IF)
* Other items:
* [Chef's presses](https://www.thechefspress.com/shop)
* [UK link](https://www.kitchenprovisions.co.uk/products/the-chefs-press-8oz-13oz)
* [FR link](https://www.thekitchenlab.fr/fr/p/la-presse-du-chef-poids-de-friture-bruce-hill-13-oz/)
* [Combustion Inc thermometer and
display](https://combustion.inc/products/predictive-thermometer-gen2-wifi-display)
* [Get the one with the range extender if you *really* want to spoil
me](https://combustion.inc/products/predictive-thermometer-gen2-wifi-booster-wifi-display)
* [Cannelés
molds](https://www.laboetgato.fr/en/moules-a-canneles/13964-mould-for-canneles-non-polished-copper-o-45-mm-3333331010026.html)

View file

@ -1,68 +1,28 @@
{
"nodes": {
"flake-compat": {
"flake": false,
"locked": {
"lastModified": 1696426674,
"narHash": "sha256-kvjfFW7WAETZlt09AgDn1MrtKzP7t90Vf7vypd3OL1U=",
"owner": "edolstra",
"repo": "flake-compat",
"rev": "0f9255e01c2351cc7d116c072cb317785dd33b33",
"type": "github"
},
"original": {
"owner": "edolstra",
"repo": "flake-compat",
"type": "github"
}
},
"futils": {
"inputs": {
"systems": "systems"
},
"locked": {
"lastModified": 1710146030,
"narHash": "sha256-SZ5L6eA7HJ/nmkzGG7/ISclqe6oZdOZTNoesiInkXPQ=",
"lastModified": 1622445595,
"narHash": "sha256-m+JRe6Wc5OZ/mKw2bB3+Tl0ZbtyxxxfnAWln8Q5qs+Y=",
"owner": "numtide",
"repo": "flake-utils",
"rev": "b1d9ab70662946ef0850d488da1c9019f3a9752a",
"rev": "7d706970d94bc5559077eb1a6600afddcd25a7c8",
"type": "github"
},
"original": {
"owner": "numtide",
"ref": "main",
"ref": "master",
"repo": "flake-utils",
"type": "github"
}
},
"gitignore": {
"inputs": {
"nixpkgs": [
"pre-commit-hooks",
"nixpkgs"
]
},
"locked": {
"lastModified": 1709087332,
"narHash": "sha256-HG2cCnktfHsKV0s4XW83gU3F57gaTljL9KNSuG6bnQs=",
"owner": "hercules-ci",
"repo": "gitignore.nix",
"rev": "637db329424fd7e46cf4185293b9cc8c88c95394",
"type": "github"
},
"original": {
"owner": "hercules-ci",
"repo": "gitignore.nix",
"type": "github"
}
},
"nixpkgs": {
"locked": {
"lastModified": 1722415718,
"narHash": "sha256-5US0/pgxbMksF92k1+eOa8arJTJiPvsdZj9Dl+vJkM4=",
"lastModified": 1628320020,
"narHash": "sha256-4xBEb+TOHyIGpK37EVsZx6dGPwNMf5YWNBJaQ4VyZws=",
"owner": "NixOS",
"repo": "nixpkgs",
"rev": "c3392ad349a5227f4a3464dce87bcc5046692fce",
"rev": "67c80531be622641b5b2ccc3a7aff355cb02476b",
"type": "github"
},
"original": {
@ -74,21 +34,19 @@
},
"pre-commit-hooks": {
"inputs": {
"flake-compat": "flake-compat",
"gitignore": "gitignore",
"nixpkgs": [
"nixpkgs"
"flake-utils": [
"futils"
],
"nixpkgs-stable": [
"nixpkgs": [
"nixpkgs"
]
},
"locked": {
"lastModified": 1721042469,
"narHash": "sha256-6FPUl7HVtvRHCCBQne7Ylp4p+dpP3P/OYuzjztZ4s70=",
"lastModified": 1621411868,
"narHash": "sha256-R+7OQ2JYFCb3E7Jl7LhRifzMVCR6Gl8R98zYsNhZtJ8=",
"owner": "cachix",
"repo": "pre-commit-hooks.nix",
"rev": "f451c19376071a90d8c58ab1a953c6e9840527fd",
"rev": "2e7fac06108b4fc81f5ff9ed9a02bc4f6ede7001",
"type": "github"
},
"original": {
@ -104,21 +62,6 @@
"nixpkgs": "nixpkgs",
"pre-commit-hooks": "pre-commit-hooks"
}
},
"systems": {
"locked": {
"lastModified": 1681028828,
"narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
"owner": "nix-systems",
"repo": "default",
"rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
"type": "github"
},
"original": {
"owner": "nix-systems",
"repo": "default",
"type": "github"
}
}
},
"root": "root",

View file

@ -6,7 +6,7 @@
type = "github";
owner = "numtide";
repo = "flake-utils";
ref = "main";
ref = "master";
};
nixpkgs = {
@ -22,8 +22,8 @@
repo = "pre-commit-hooks.nix";
ref = "master";
inputs = {
flake-utils.follows = "futils";
nixpkgs.follows = "nixpkgs";
nixpkgs-stable.follows = "nixpkgs";
};
};
};
@ -61,17 +61,15 @@
};
};
devShells = {
default = pkgs.mkShell {
name = "blog";
devShell = pkgs.mkShell {
name = "blog";
buildInputs = with pkgs; [
gnumake
hugo
];
buildInputs = with pkgs; [
gnumake
hugo
];
inherit (self.checks.${system}.pre-commit) shellHook;
};
inherit (self.checks.${system}.pre-commit) shellHook;
};
}
);

View file

@ -1,5 +1,5 @@
series:
other: "series"
serie:
other: "serie"
Series:
other: "Series"

View file

@ -1,4 +1,4 @@
series:
serie:
other: "série"
Series:

View file

@ -11,7 +11,6 @@
<a data-hint="Sourcehut" title="Sourcehut" href="https://sr.ht/~{{ .Site.Author.sourcehut }}" target="_blank" rel="me"> Sourcehut </a>
<a data-hint="LinkedIn" title="LinkedIn" href="https://www.linkedin.com/in/{{ .Site.Author.linkedin }}" target="_blank" rel="me"> LinkedIn </a>
<a data-hint="Matrix" title="Matrix" href="https://matrix.to/#/{{ .Site.Author.matrix }}" target="_blank" rel="me"> Matrix </a>
<a data-hint="Mastodon" title="Mastodon" href="https://{{ .Site.Author.mastodon }}" target="_blank" rel="me"> Mastodon </a>
<a rel="pgpkey" href="https://key.belanyi.fr/key.pgp"> PGP </a>
<link rel="authorization_endpoint" href="https://indieauth.com/auth">
<p>

View file

@ -3,30 +3,6 @@
<link rel="stylesheet" type="text/css" href="https://tikzjax.com/v1/fonts.css">
<script async src="https://tikzjax.com/v1/tikzjax.js"></script>
{{ end }}
<!-- Graphviz support -->
{{ if (.Params.graphviz) }}
<script src="https://cdn.jsdelivr.net/npm/@viz-js/viz@3.7.0/lib/viz-standalone.min.js"></script>
<script type="text/javascript">
(function() {
Viz.instance().then(function(viz) {
Array.prototype.forEach.call(document.querySelectorAll("pre.graphviz"), function(x) {
var svg = viz.renderSVGElement(x.innerText);
// Let CSS take care of the SVG size
svg.removeAttribute("width")
svg.setAttribute("height", "auto")
x.replaceChildren(svg)
})
})
})();
</script>
{{ end }}
<!-- Mermaid support -->
{{ if (.Params.mermaid) }}
<script type="module" async>
import mermaid from "https://cdn.jsdelivr.net/npm/mermaid@latest/dist/mermaid.esm.min.mjs";
mermaid.initialize({ startOnLoad: true });
</script>
{{ end }}
{{ with .OutputFormats.Get "atom" -}}
{{ printf `<link rel="%s" type="%s" href="%s" title="%s" />` .Rel .MediaType.Type .Permalink $.Site.Title | safeHTML }}
{{ end -}}

View file

@ -1,16 +0,0 @@
<pre class="graphviz">
{{ with .Get "file" }}
{{ if eq (. | printf "%.1s") "/" }}
{{/* Absolute path are from root of site. */}}
{{ $.Scratch.Set "filepath" . }}
{{ else }}
{{/* Relative paths are from page directory. */}}
{{ $.Scratch.Set "filepath" $.Page.File.Dir }}
{{ $.Scratch.Add "filepath" . }}
{{ end }}
{{ $.Scratch.Get "filepath" | readFile }}
{{ else }}
{{.Inner}}
{{ end }}
</pre>

View file

@ -1,16 +0,0 @@
<pre class="mermaid">
{{ with .Get "file" }}
{{ if eq (. | printf "%.1s") "/" }}
{{/* Absolute path are from root of site. */}}
{{ $.Scratch.Set "filepath" . }}
{{ else }}
{{/* Relative paths are from page directory. */}}
{{ $.Scratch.Set "filepath" $.Page.File.Dir }}
{{ $.Scratch.Add "filepath" . }}
{{ end }}
{{ $.Scratch.Get "filepath" | readFile }}
{{ else }}
{{.Inner}}
{{ end }}
</pre>

View file

@ -1,16 +1,3 @@
<script type="text/tikz">
{{ with .Get "file" }}
{{ if eq (. | printf "%.1s") "/" }}
{{/* Absolute path are from root of site. */}}
{{ $.Scratch.Set "filepath" . }}
{{ else }}
{{/* Relative paths are from page directory. */}}
{{ $.Scratch.Set "filepath" $.Page.File.Dir }}
{{ $.Scratch.Add "filepath" . }}
{{ end }}
{{ $.Scratch.Get "filepath" | readFile }}
{{ else }}
{{.Inner}}
{{ end }}
{{.Inner}}
</script>

@ -1 +1 @@
Subproject commit d77e0d6d552ad135138f1f9443ef30cc5af8f0f3
Subproject commit 5dab60e04a37896c09a32137aefe821c63b3af04

@ -1 +1 @@
Subproject commit d545effed9949bf834eaed09ad423ec3e030794f
Subproject commit 5da913dc46d2dadcaf6548256238c58c504476de