diff --git a/.drone.jsonnet b/.drone.jsonnet new file mode 100644 index 0000000..a5059e2 --- /dev/null +++ b/.drone.jsonnet @@ -0,0 +1,65 @@ +local Pipeline(isDev) = { + kind: "pipeline", + type: "exec", + name: if isDev then "Deploy to dev" else "Deploy to prod", + # Dev ignores "master", prod only triggers on "master" + trigger: { branch: { [if isDev then "exclude" else "include"]: [ "main" ] } }, + steps: [ + { + # We want to clone the submodules, which isn't done by default + name: "submodules", + commands: [ + "git submodule update --recursive --init", + ] + }, + { + # Include pre-commit checks, which include markdownlint + name: "check", + commands: [ + "nix flake check", + ], + }, + { + # If dev, include drafts and future articles, change base URL + name: "build", + commands: [ + "nix develop -c make " + if isDev then "build-dev" else "build-prod", + ], + }, + { + name: "deploy", + commands: [ + "nix run github:ambroisie/nix-config#drone-scp", + ], + environment: { + SCP_SOURCE: "public/*", + TAR_STRIP_COMPONENTS: 1, # Remove 'public/' suffix from file paths + SCP_RM: true, # Remove previous files from target directory + SCP_HOST: { from_secret: "ssh_host" }, + SCP_TARGET: { from_secret: "ssh_target" + if isDev then "_dev" else "" }, + SCP_USERNAME: { from_secret: "ssh_user" }, + SCP_KEY: { from_secret: "ssh_key" }, + SCP_PORT: { from_secret: "ssh_port" }, + }, + }, + { + name: "notify", + commands: [ + "nix run github:ambroisie/matrix-notifier", + ], + environment: { + ADDRESS: { from_secret: "matrix_homeserver" }, + ROOM: { from_secret: "matrix_roomid" }, + USER: { from_secret: "matrix_username" }, + PASS: { from_secret: "matrix_password" }, + }, + when: { status: [ "failure", "success", ] }, + }, + ] +}; + + +[ + Pipeline(false), + Pipeline(true), +] diff --git a/.envrc b/.envrc index 3550a30..116d0c6 100644 --- a/.envrc +++ b/.envrc @@ -1 +1,8 @@ +use_flake() { + watch_file flake.nix + watch_file flake.lock + eval "$(nix print-dev-env)" +} + use flake +eval "$shellHooks" diff --git a/.markdownlint.yaml b/.markdownlint.yaml deleted file mode 100644 index 419c334..0000000 --- a/.markdownlint.yaml +++ /dev/null @@ -1,3 +0,0 @@ -# MD024/no-duplicate-heading/no-duplicate-header -MD024: - siblings_only: true diff --git a/.woodpecker/deploy.yml b/.woodpecker/deploy.yml deleted file mode 100644 index a6a8b0f..0000000 --- a/.woodpecker/deploy.yml +++ /dev/null @@ -1,64 +0,0 @@ -labels: - backend: local - -matrix: - include: - - TYPE: dev - MAKE_TARGET: build-dev - SSH_TARGET: ssh_target_dev - - TYPE: prod - MAKE_TARGET: build-prod - SSH_TARGET: ssh_target - -# Run the correct matrix build on the correct branch -when: - evaluate: | - ((CI_COMMIT_BRANCH == CI_REPO_DEFAULT_BRANCH) == ("${TYPE}" == "prod")) - -steps: -- name: check - image: bash - commands: - - nix flake check - -- name: build (${TYPE}) - image: bash - commands: - # If dev, include drafts and future articles, change base URL - - nix develop -c make ${MAKE_TARGET} - -- name: deploy (${TYPE}) - image: bash - environment: - # Trailing slash to synchronize the folder's *content* to the target - SYNC_SOURCE: public/ - SYNC_KEY: - from_secret: ssh_key - SYNC_PORT: - from_secret: ssh_port - SYNC_TARGET: - from_secret: ${SSH_TARGET} - SYNC_USERNAME: - from_secret: ssh_user - SYNC_HOST: - from_secret: ssh_host - commands: - - "nix run github:ambroisie/nix-config#drone-rsync" - -- name: notify - image: bash - environment: - ADDRESS: - from_secret: matrix_homeserver - ROOM: - from_secret: matrix_roomid - USER: - from_secret: matrix_username - PASS: - from_secret: matrix_password - commands: - - nix run github:ambroisie/matrix-notifier - when: - status: - - failure - - success diff --git a/archetypes/default.md b/archetypes/default.md index 12912b7..3529484 100644 --- a/archetypes/default.md +++ b/archetypes/default.md @@ -5,18 +5,15 @@ draft: false # I don't care for draft mode, git has branches for that description: "" tags: - accounting - - algorithms - c++ - ci/cd - cli - - data structures - design-pattern - docker - drone - git - hugo - nix - - python - self-hosting - test categories: diff --git a/config.yaml b/config.yaml index c13a739..0efb236 100644 --- a/config.yaml +++ b/config.yaml @@ -6,6 +6,8 @@ theme: - "hugo-atom-feed" - "anubis" paginate: 5 +disqusShortname: "" +googleAnalytics: "" enableRobotsTXT: true enableEmoji: true @@ -35,12 +37,11 @@ menu: author: name: "Bruno BELANYI" email: "contact-blog@belanyi.fr" - github: "ambroisie" - gitlab: "ambroisie" + github: "Ambroisie" + gitlab: "Ambroisie" sourcehut: "ambroisie" linkedin: "bruno-belanyi" matrix: "@ambroisie:belanyi.fr" - mastodon: "nixos.paris/@ambroisie" permalinks: posts: /:year/:month/:day/:title/ @@ -65,18 +66,11 @@ params: webmentions: login: belanyi.fr pingback: true - mathjax: true - -services: - disqus: - shortname: "" - googleAnalytics: - ID: "" taxonomies: category: "categories" tag: "tags" - series: "series" + serie: "series" markup: goldmark: diff --git a/content/about.md b/content/about.md index 3303a55..12b578b 100644 --- a/content/about.md +++ b/content/about.md @@ -4,10 +4,7 @@ description: "About me" date: 2020-07-14 --- -I'm currently working as a Software Engineer at [Google][google], as part -of their Embedded Graphics Drivers team for Pixel devices. - -[google]: https://www.linkedin.com/company/google/ +I'm a CS student at EPITA. You can find my CV [here](https://cv.belanyi.fr/en.pdf). Or [here](https://cv.belanyi.fr/fr.pdf) for the french version. diff --git a/content/posts/2020-07-14-hello-world/index.md b/content/posts/2020-07-14-hello-world/index.md index d430add..805eb07 100644 --- a/content/posts/2020-07-14-hello-world/index.md +++ b/content/posts/2020-07-14-hello-world/index.md @@ -8,8 +8,6 @@ tags: categories: favorite: false tikz: true -graphviz: true -mermaid: true --- ## Test post please ignore @@ -42,29 +40,6 @@ echo hello world | cut -d' ' -f 1 \end{tikzpicture} {{% /tikz %}} -### Graphviz support - -{{% graphviz %}} - graph { - a -- b - b -- c - c -- a - } -{{% /graphviz %}} - -### Mermaid support - -{{% mermaid %}} - graph TD - A[Enter Chart Definition] --> B(Preview) - B --> C{decide} - C --> D[Keep] - C --> E[Edit Definition] - E --> B - D --> F[Save Image and Code] - F --> B -{{% /graphviz %}} - ### Spoilers {{% spoiler "Don't open me" %}} diff --git a/content/posts/2020-07-16-generic-flyweight-cpp/index.md b/content/posts/2020-07-16-generic-flyweight-cpp/index.md index 3ca1e3a..303b3db 100644 --- a/content/posts/2020-07-16-generic-flyweight-cpp/index.md +++ b/content/posts/2020-07-16-generic-flyweight-cpp/index.md @@ -16,7 +16,7 @@ favorite: false The flyweight is a well-known [GoF](https://en.wikipedia.org/wiki/Design_Patterns) design pattern. -Its intent is to minimize memory usage by reducing the number of instantiations +It's intent is to minimize memory usage by reducing the number of instantiations of a given object. I will show you how to implement a robust flyweight in C++, as well as a way to diff --git a/content/posts/2020-07-22-polymorphic-flyweight-cpp/index.md b/content/posts/2020-07-22-polymorphic-flyweight-cpp/index.md index 2311002..5ff4b1f 100644 --- a/content/posts/2020-07-22-polymorphic-flyweight-cpp/index.md +++ b/content/posts/2020-07-22-polymorphic-flyweight-cpp/index.md @@ -68,7 +68,7 @@ public: const std::type_index lhs_i(lhs); const std::type_index rhs_i(rhs); if (lhs_i != rhs_i) - return lhs_i < rhs_i; + returh lhs_i < rhs_i; // We are now assured that both classes have the same type return less_than(rhs); } diff --git a/content/posts/2020-12-07-git-basics/index.md b/content/posts/2020-12-07-git-basics/index.md index 998eaf6..daa3682 100644 --- a/content/posts/2020-12-07-git-basics/index.md +++ b/content/posts/2020-12-07-git-basics/index.md @@ -8,6 +8,8 @@ tags: - cli categories: - programming +series: + - Git basics favorite: false --- @@ -134,7 +136,7 @@ branch. #### Fixup, a practical example A specific kind of squashing which I use frequently is the notion of `fixup`s. -Say you've committed a change (*A*), and later on notice that it is missing +Say you've commited a change (*A*), and later on notice that it is missing a part of the changeset. You can decide to commit that missing part (*A-bis*) and annotate it to mean that it is linked to *A*. @@ -184,7 +186,7 @@ After applying the rebase, you find yourself with the complete change inside This is especially useful when you want to apply suggestion on a merge request after it was reviewed. You can keep a clean history without those pesky `Apply -suggestion ...` commits being part of your history. +suggestion ...` commmits being part of your history. ### Lost commits and the reflog @@ -315,7 +317,7 @@ easily choose which parts of your changes should end up in the same commit. Here's a list of commands that you should read-up on, but I won't be presenting further: -* `git bisect` +* `git bissect` * `git rerere` * `git stash` * and more... diff --git a/content/posts/2022-11-02-multiple-dispatch-in-c++/index.md b/content/posts/2022-11-02-multiple-dispatch-in-c++/index.md deleted file mode 100644 index 0e59968..0000000 --- a/content/posts/2022-11-02-multiple-dispatch-in-c++/index.md +++ /dev/null @@ -1,329 +0,0 @@ ---- -title: "Multiple Dispatch in C++" -date: 2022-11-02T16:36:53+01:00 -draft: false # I don't care for draft mode, git has branches for that -description: "A Lisp super-power in C++" -tags: - - c++ - - design-pattern -categories: - - programming -series: -favorite: false -disable_feed: false ---- - -A great feature that can be used in more dynamic languages is *multiple -dispatch*. Here's an example in [Julia][julia-lang] taken from the [Wikipedia -article][wiki-multiple-dispatch]. - -```julia -abstract type SpaceObject end - -struct Asteroid <: SpaceObject - # Asteroid fields -end -struct Spaceship <: SpaceObject - # Spaceship fields -end - -collide_with(::Asteroid, ::Spaceship) = # Asteroid/Spaceship collision -collide_with(::Spaceship, ::Asteroid) = # Spaceship/Asteroid collision -collide_with(::Spaceship, ::Spaceship) = # Spaceship/Spaceship collision -collide_with(::Asteroid, ::Asteroid) = # Asteroid/Asteroid collision - -collide(x::SpaceObject, y::SpaceObject) = collide_with(x, y) -``` - -The `collide` function calls `collide_with` which, at runtime, will inspect the -types of its arguments and *dispatch* to the appropriate implementation. - -Julia was created with multiple dispatch as a first-class citizen, it is used -liberally in its ecosystem. C++ does not have access to such a feature natively, -but there are alternatives that I will be presenting in this article, and try to -justify there uses and limitations. - -[julia-lang]: https://julialang.org/ -[wiki-multiple-dispatch]: https://en.wikipedia.org/wiki/Multiple_dispatch - - -## Single dispatch - -The native way to perform dynamic dispatch in C++ is through the -use of *virtual methods*, which allows an object to *override* the behaviour of -one of its super-classes' method. - -Invoking a virtual method will perform *single dispatch*, on the dynamic type -of the object who's method is being called. - -Here is an example: - -```cpp -struct SpaceObject { - virtual ~SpaceObject() = default; - - // Pure virtual method, which must be overridden by non-abstract sub-classes - virtual void impact() = 0; -}; - -struct Asteroid : SpaceObject { - // Override the method for asteroid impacts - void impact() override { - std::cout << "Bang!\n"; - } -}; - -struct Spaceship : SpaceObject { - // Override the method for spaceship impacts - void impact() override { - std::cout << "Crash!\n"; - } -}; - -int main() { - std::unique_ptr object = std::make_unique(); - object->impact(); // Prints "Crash!" - - object = std::make_unique(); - object->impact(); // Prints "Bang!" -} -``` - -Virtual methods are great when you want to represent a common set of behaviour -(an *interface*), and be able to substitute various types with their specific -implementation. - -For example, a dummy file-system interface might look like the following: - -```cpp -struct Filesystem { - virtual void write(std::string_view filename, std::span data) = 0; - virtual std::vector read(std::string_view filename) = 0; - virtual void delete(std::string_view filename) = 0; -}; -``` - -You can then write `PosixFilesystem` which makes use of the POSIX API and -interact with actual on-disk data, `MockFilesystem` which only works in-memory -and can be used for testing, etc... - -## Double dispatch through the Visitor pattern - -Sometimes single dispatch is not enough, such as in the collision example at the -beginning of this article. In cases where a computation depends on the dynamic -type of *two* of its values, we can make use of double-dispatch by leveraging -the Visitor design pattern. This is done by calling a virtual method on the -first value, which itself will call a virtual method on the second value. - -Here's a commentated example: - -```cpp -struct Asteroid; -struct Spaceship; - -struct SpaceObject { - virtual ~SpaceObject() = default; - - // Only used to kick-start the double-dispatch process - virtual void collide_with(SpaceObject& other) = 0; - - // The actual dispatching methods - virtual void collide_with(Asteroid& other) = 0; - virtual void collide_with(Spaceship& other) = 0; -}; - -struct Asteroid : SpaceObject { - void collide_with(SpaceObject& other) override { - // `*this` is an `Asteroid&` which kick-starts the double-dispatch - other.collide_with(*this); - }; - - void collide_with(Asteroid& other) override { /* Asteroid/Asteroid */ }; - void collide_with(Spaceship& other) override { /* Asteroid/Spaceship */ }; -}; - -struct Spaceship : SpaceObject { - void collide_with(SpaceObject& other) override { - // `*this` is a `Spaceship&` which kick-starts the double-dispatch - other.collide_with(*this); - }; - - void collide_with(Asteroid& other) override { /* Spaceship/Asteroid */ }; - void collide_with(Spaceship& other) override { /* Spaceship/Spaceship */ }; -}; - -void collide(SpaceObject& first, SpaceObject& second) { - first.collide_with(second); -}; - -int main() { - auto asteroid = std::make_unique(); - auto spaceship = std::make_unique(); - - collide(*asteroid, *spaceship); - // Calls in order: - // - Asteroid::collide_with(SpaceObject&) - // - Spaceship::collide_with(Asteroid&) - - collide(*spaceship, *asteroid); - // Calls in order: - // - Spaceship::collide_with(SpaceObject&) - // - Asteroid::collide_with(Spaceship&) - - asteroid->collide_with(*spaceship); - // Only calls Asteroid::collide_with(Spaceship&) - - spaceship->collide_with(*asteroid); - // Only calls Spaceship::collide_with(Asteroid&) -} -``` - -Double dispatch is pattern is most commonly used with the *visitor pattern*, in -which a closed class hierarchy (the data) is separated from an open class -hierarchy (the algorithms acting on that data). This is especially useful in -e.g: compilers, where the AST class hierarchy represents the data *only*, and -all compiler stages and optimization passes are programmed by a series of -visitors. - -One downside of this approach is that if you want to add `SpaceStation` as -a sub-class of `SpaceObject`, and handle its collisions with other -`SpaceObject`s, you need to: - -* Implement all `collide_with` methods for this new class. -* Add a new virtual method `collide_with(SpaceStation&)` and implement it on - every sub-class. - -This can be inconvenient if your class hierarchy changes often. - -## Multiple dispatch on a closed class hierarchy - -When even double dispatch is not enough, there is a way to do multiple dispatch -in standard C++, included in the STL since C++17. However unlike the previous -methods I showed, this one relies on using [`std::variant`][variant-cppref] and -[`std::visit`][visit-cppref]. - -[variant-cppref]: https://en.cppreference.com/w/cpp/utility/variant -[visit-cppref]: https://en.cppreference.com/w/cpp/utility/variant/visit - -The limitation of `std::variant` is that you are limited to the types you can -select at *compile-time* for the values used during your dispatch operation. -You have a *closed* hierarchy of classes, which is the explicit list of types in -your `variant`. - -Nonetheless, if you can live with that limitation, then you have a great amount -of power available to you. I have used `std::visit` in the past to mimic the -effect of pattern matching. - -In this example, I re-create the double-dispatch from the previous section: - -```cpp -// No need to inherit from a `SpaceObject` base class -struct Asteroid {}; -struct Spaceship {}; - -// But the list of possible runtime *must* be enumerated at compile-time -using SpaceObject = std::variant; - -void collide(SpaceObject& first, SpaceObject& second) { - struct CollideDispatch { - void operator()(Asteroid& first, Asteroid& second) { - // Asteroid/Asteroid - } - void operator()(Asteroid& first, Spaceship& second) { - // Asteroid/Spaceship - } - void operator()(Spaceship& first, Asteroid& second) { - // Spaceship/Asteroid - } - void operator()(Spaceship& first, Spaceship& second) { - // Spaceship/Spaceship - } - }; - - std::visit(CollideDispatch(), first, second); -} - -int main() { - SpaceObject asteroid = Asteroid(); - SpaceObject spaceship = Spaceship(); - - collide(asteroid, spaceship); - // Calls CollideDispatch::operator()(Asteroid&, Spaceship&) - - collide(spaceship, asteroid); - // Calls CollideDispatch::operator()(Spaceship&, Asteroid&) -} -``` - -Obviously, the issue with adding a new `SpaceStation` variant is once again -apparent in this implementation. You will get a compile error unless you handle -this new `SpaceStation` variant at every point you `visit` the `SpaceObject`s. - -## The Expression Problem - -One issue we have not been able to move past in these examples is the -[Expression Problem][expression-problem]. In two words, this means that we can't -add a new data type (e.g: `SpaceStation`), or a new operation (e.g: `land_on`) -to our current code without re-compiling it. - -[expression-problem]: https://en.wikipedia.org/wiki/Expression_problem - -This is the downside I was pointing out in our previous sections: - -* Data type extension: one can easily add a new `SpaceObject` child-class in the - OOP version, but needs to modify each implementation if we want to add a new - method to the `SpaceObject` interface to implement a new operation. -* Operation extension: one can easily create a new function when using the - `std::variant` based representation, as pattern-matching easily allows us to - only handle the kinds of values we are interested in. But adding a new - `SpaceObject` variant means we need to modify and re-compile every - `std::visit` call to handle the new variant. - -There is currently no (good) way in standard C++ to tackle the Expression -Problem. A paper ([N2216][N2216]) was written to propose a new language feature -to improve the situation. However it looks quite complex, and never got followed -up on for standardization. - -[N2216]: https://open-std.org/jtc1/sc22/wg21/docs/papers/2007/n2216.pdf - -In the meantime, one can find some libraries (like [`yomm2`][yomm2]) that -reduce the amount of boiler-plate needed to emulate this feature. - -[yomm2]: https://github.com/jll63/yomm2 - -```cpp -#include - -struct SpaceObject { - virtual ~SpaceObject() = default; -}; - -struct Asteroid : SpaceObject { /* fields, methods, etc... */ }; - -struct Spaceship : SpaceObject { /* fields, methods, etc... */ }; - -// Register all sub-classes of `SpaceObject` for use with open methods -register_classes(SpaceObject, Asteroid, Spaceship); - -// Register the `collide` open method, which dispatches on two arguments -declare_method(void, collide, (virtual_, virtual_)); - -// Write the different implementations of `collide` -define_method(void, collide, (Asteroid& left, Asteroid& right)) { /* work */ } -define_method(void, collide, (Asteroid& left, Spaceship& right)) { /* work */ } -define_method(void, collide, (Spaceship& left, Asteroid& right)) { /* work */ } -define_method(void, collide, (Spaceship& left, Spaceship& right)) { /* work */ } - - -int main() { - yorel::yomm2::update_methods(); - - auto asteroid = std::make_unique(); - auto spaceship = std::make_unique(); - - collide(*asteroid, *spaceship); // Calls (Asteroid, Spaceship) version - collide(*spaceship, *asteroid); // Calls (Spaceship, Asteroid) version - collide(*asteroid, *asteroid); // Calls (Asteroid, Asteroid) version - collide(*spaceship, *spaceship); // Calls (Spaceship, Spaceship) version -} -``` diff --git a/content/posts/2024-06-24-union-find/index.md b/content/posts/2024-06-24-union-find/index.md deleted file mode 100644 index dfe4c68..0000000 --- a/content/posts/2024-06-24-union-find/index.md +++ /dev/null @@ -1,157 +0,0 @@ ---- -title: "Union Find" -date: 2024-06-24T21:07:49+01:00 -draft: false # I don't care for draft mode, git has branches for that -description: "My favorite data structure" -tags: - - algorithms - - data structures - - python -categories: - - programming -series: - - Cool algorithms -favorite: false -disable_feed: false ---- - -To kickoff the [series]({{< ref "/series/cool-algorithms/" >}}) of posts about -algorithms and data structures I find interesting, I will be talking about my -favorite one: the [_Disjoint Set_][wiki]. Also known as the _Union-Find_ data -structure, so named because of its two main operations: `ds.union(lhs, rhs)` and -`ds.find(elem)`. - -[wiki]: https://en.wikipedia.org/wiki/Disjoint-set_data_structure - - - -## What does it do? - -The _Union-Find_ data structure allows one to store a collection of sets of -elements, with operations for adding new sets, merging two sets into one, and -finding the representative member of a set. Not only does it do all that, but it -does it in almost constant (amortized) time! - -Here is a small motivating example for using the _Disjoint Set_ data structure: - -```python -def connected_components(graph: Graph) -> list[set[Node]]: - # Initialize the disjoint set so that each node is in its own set - ds: DisjointSet[Node] = DisjointSet(graph.nodes) - # Each edge is a connection, merge both sides into the same set - for (start, dest) in graph.edges: - ds.union(start, dest) - # Connected components share the same (arbitrary) root - components: dict[Node, set[Node]] = defaultdict(set) - for n in graph.nodes: - components[ds.find(n)].add(n) - # Return a list of disjoint sets corresponding to each connected component - return list(components.values()) -``` - -## Implementation - -I will show how to implement `UnionFind` for integers, though it can easily be -extended to be used with arbitrary types (e.g: by mapping each element -one-to-one to a distinct integer, or using a different set representation). - -### Representation - -Creating a new disjoint set is easy enough: - -```python -class UnionFind: - _parent: list[int] - _rank: list[int] - - def __init__(self, size: int): - # Each node is in its own set, making it its own parent... - self._parents = list(range(size)) - # ... And its rank 0 - self._rank = [0] * size -``` - -We represent each set through the `_parent` field: each element of the set is -linked to its parent, until the root node which is its own parent. When first -initializing the structure, each element is in its own set, so we initialize -each element to be a root and make it its own parent (`_parent[i] == i` for all -`i`). - -The `_rank` field is an optimization which we will touch on in a later section. - -### Find - -A naive Implementation of `find(...)` is simple enough to write: - -```python -def find(self, elem: int) -> int: - # If `elem` is its own parent, then it is the root of the tree - if (parent := self._parent[elem]) == elem: - return elem - # Otherwise, recurse on the parent - return self.find(parent) -``` - -However, going back up the chain of parents each time we want to find the root -node (an `O(n)` operation) would make for disastrous performance. Instead we can -do a small optimization called _path splitting_. - -```python -def find(self, elem: int) -> int: - while (parent := self._parent[elem]) != elem: - # Replace each parent link by a link to the grand-parent - elem, self._parent[elem] = parent, self._parent[parent] - return elem -``` - -This flattens the chain so that each node links more directly to the root (the -length is reduced by half), making each subsequent `find(...)` faster. - -Other compression schemes exist, along the spectrum between faster shortening -the chain faster earlier, or updating `_parent` fewer times per `find(...)`. - -### Union - -A naive implementation of `union(...)` is simple enough to write: - -```python -def union(self, lhs: int, rhs: int) -> int: - # Replace both element by their root parent - lhs = self.find(lhs) - rhs = self.find(rhs) - # arbitrarily merge one into the other - self._parent[rhs] = lhs - # Return the new root - return lhs -``` - -Once again, improvements can be made. Depending on the order in which we call -`union(...)`, we might end up creating a long chain from the leaf of the tree to -the root node, leading to slower `find(...)` operations. If at all possible, we -would like to keep the trees as shallow as possible. - -To do so, we want to avoid merging taller trees into smaller ones, so as to keep -them as balanced as possible. Since a higher tree will result in a slower -`find(...)`, keeping the trees balanced will lead to increased performance. - -This is where the `_rank` field we mentioned earlier comes in: the _rank_ of an -element is an upper bound on its height in the tree. By keeping track of this -_approximate_ height, we can keep the trees balanced when merging them. - -```python -def union(self, lhs: int, rhs: int) -> int: - lhs = self.find(lhs) - rhs = self.find(rhs) - # Bail out early if they already belong to the same set - if lhs == rhs: - return lhs - # Always keep `lhs` as the taller tree - if (self._rank[lhs] < self._rank[rhs]) - lhs, rhs = rhs, lhs - # Merge the smaller tree into the taller one - self._parent[rhs] = lhs - # Update the rank when merging trees of approximately the same size - if self._rank[lhs] == self._rank[rhs]: - self._rank[lhs] += 1 - return lhs -``` diff --git a/content/posts/2024-06-30-trie/index.md b/content/posts/2024-06-30-trie/index.md deleted file mode 100644 index aef49e3..0000000 --- a/content/posts/2024-06-30-trie/index.md +++ /dev/null @@ -1,171 +0,0 @@ ---- -title: "Trie" -date: 2024-06-30T11:07:49+01:00 -draft: false # I don't care for draft mode, git has branches for that -description: "A cool map" -tags: - - algorithms - - data structures - - python -categories: - - programming -series: - - Cool algorithms -favorite: false -disable_feed: false ---- - -This time, let's talk about the [_Trie_][wiki], which is a tree-based mapping -structure most often used for string keys. - -[wiki]: https://en.wikipedia.org/wiki/Trie - - - -## What does it do? - -A _Trie_ can be used to map a set of string keys to their corresponding values, -without the need for a hash function. This also means you won't suffer from hash -collisions, though the tree-based structure will probably translate to slower -performance than a good hash table. - -A _Trie_ is especially useful to represent a dictionary of words in the case of -spell correction, as it can easily be used to fuzzy match words under a given -edit distance (think [Levenshtein distance]) - -[Levenshtein distance]: https://en.wikipedia.org/wiki/Levenshtein_distance - -## Implementation - -This implementation will be in Python for exposition purposes, even though -it already has a built-in `dict`. - -### Representation - -Creating a new `Trie` is easy: the root node starts off empty and without any -mapped values. - -```python -class Trie[T]: - _children: dict[str, Trie[T]] - _value: T | None - - def __init__(self): - # Each letter is mapped to a Trie - self._children = defaultdict(Trie) - # If we match a full string, we store the mapped value - self._value = None -``` - -We're using a `defaultdict` for the children for ease of implementation in this -post. In reality, I would encourage you exit early when you can't match a given -character. - -The string key will be implicit by the position of a node in the tree: the empty -string at the root, one-character strings as its direct children, etc... - -### Search - -An exact match look-up is easily done: we go down the tree until we've exhausted -the key. At that point we've either found a mapped value or not. - -```python -def get(self, key: str) -> T | None: - # Have we matched the full key? - if not key: - # Store the `T` if mapped, `None` otherwise - return self._value - # Otherwise, recurse on the child corresponding to the first letter - return self._children[key[0]].get(key[1:]) -``` - -### Insertion - -Adding a new value to the _Trie_ is similar to a key lookup, only this time we -store the new value instead of returning it. - -```python -def insert(self, key: str, value: T) -> bool: - # Have we matched the full key? - if not key: - # Check whether we're overwriting a previous mapping - was_mapped = self._value is None - # Store the corresponding value - self._value = value - # Return whether we've performed an overwrite - return was_mapped - # Otherwise, recurse on the child corresponding to the first letter - return self._children[key[0]].insert(key[1:], value) -``` - -### Removal - -Removal should also look familiar. - -```python -def remove(self, key: str) -> bool: - # Have we matched the full key? - if not key: - was_mapped = self._value is None - # Remove the value - self._value = None - # Return whether it was mapped - return was_mapped - # Otherwise, recurse on the child corresponding to the first letter - return self._children[key[0]].remove(key[1:]) -``` - -### Fuzzy matching - -Fuzzily matching a given word is where the real difficulty is: the key is to -realize we can use the prefix-tree nature of a _Trie_ to avoid doing wasteful -work. - -By leveraging the prefix visit order of the tree, we can build an iterative -Levenshtein distance matrix, in much the same way one would do so in its -[Dynamic Programming] implementation (see the [Wagner-Fisher algorithm]). - -[Dynamic Programming]: https://en.wikipedia.org/wiki/Dynamic_programming -[Wagner-Fisher algorithm]: https://en.wikipedia.org/wiki/Wagner%E2%80%93Fischer_algorithm - -```python -class FuzzyResult[T](NamedTuple): - distance: int - key: str - value: T - - -def get_fuzzy(self, key: str, max_distance: int = 0) -> Iterator[FuzzyResult[T]]: - def helper( - current_word: str, - node: Trie[T], - previous_row: list[int], - ) -> Iterator[tuple[int, T]]: - # Iterative Levenshtein - current_row = [previous_row[0] + 1] - current_char = current_word[-1] - for column, key_char in enumerate(key, start=1): - insertion = current_row[column - 1] + 1 - deletion = previous_row[column] + 1 - replacement = previous_row[column - 1] + (key_char != current_char) - current_row.append(min(insertion, deletion, replacement)) - - # If we are under the max distance, match this node - if (distance := current_row[-1]) <= max_distance and node._value != None: - # Only if it has a value of course - yield FuzzyResult(distance, current_word, node._value) - - # If we can potentially still match children, recurse - if min(current_row) <= max_distance: - for c, child in node._children.items(): - yield from helper(current_word + c, child, current_row) - - # Build the first row -- the edit distance from the empty string - row = list(range(len(key) + 1)) - - # Base case for the empty string - if (distance := row[-1]) <= max_distance and self._value != None: - yield FuzzyResult(distance, "", self._value) - for c, child in self._children.items(): - yield from helper(c, child, row) -``` diff --git a/content/posts/2024-07-06-gap-buffer/index.md b/content/posts/2024-07-06-gap-buffer/index.md deleted file mode 100644 index 0bb3d54..0000000 --- a/content/posts/2024-07-06-gap-buffer/index.md +++ /dev/null @@ -1,191 +0,0 @@ ---- -title: "Gap Buffer" -date: 2024-07-06T21:27:19+01:00 -draft: false # I don't care for draft mode, git has branches for that -description: "As featured in GNU Emacs" -tags: - - algorithms - - data structures - - python -categories: - - programming -series: - - Cool algorithms -favorite: false -disable_feed: false ---- - -The [_Gap Buffer_][wiki] is a popular data structure for text editors to -represent files and editable buffers. The most famous of them probably being -[GNU Emacs][emacs]. - -[wiki]: https://en.wikipedia.org/wiki/Gap_buffer -[emacs]: https://www.gnu.org/software/emacs/manual/html_node/elisp/Buffer-Gap.html - - - -## What does it do? - -A _Gap Buffer_ is simply a list of characters, similar to a normal string, with -the added twist of splitting it into two side: the prefix and suffix, on either -side of the cursor. In between them, a gap is left to allow for quick -insertion at the cursor. - -Moving the cursor moves the gap around the buffer, the prefix and suffix getting -shorter/longer as required. - -## Implementation - -I'll be writing a sample implementation in Python, as with the rest of the -[series]({{< ref "/series/cool-algorithms/" >}}). I don't think it showcases the -elegance of the _Gap Buffer_ in action like a C implementation full of -`memmove`s would, but it does makes it short and sweet. - -### Representation - -We'll be representing the gap buffer as an actual list of characters. - -Given that Python doesn't _have_ characters, let's settle for a list of strings, -each representing a single character... - -```python -Char = str - -class GapBuffer: - # List of characters, contains prefix and suffix of string with gap in the middle - _buf: list[Char] - # The gap is contained between [start, end) (i.e: buf[start:end]) - _gap_start: int - _gap_end: int - - # Visual representation of the gap buffer: - # This is a very [ ]long string. - # |<----------------------------------------------->| capacity - # |<------------>| |<-------->| string - # |<------------------->| gap - # |<------------>| prefix - # |<-------->| suffix - def __init__(self, initial_capacity: int = 16) -> None: - assert initial_capacity > 0 - # Initialize an empty gap buffer - self._buf = [""] * initial_capacity - self._gap_start = 0 - self._gap_end = initial_capacity -``` - -### Accessors - -I'm mostly adding these for exposition, and making it easier to write `assert`s -later. - -```python -@property -def capacity(self) -> int: - return len(self._buf) - -@property -def gap_length(self) -> int: - return self._gap_end - self._gap_start - -@property -def string_length(self) -> int: - return self.capacity - self.gap_length - -@property -def prefix_length(self) -> int: - return self._gap_start - -@property -def suffix_length(self) -> int: - return self.capacity - self._gap_end -``` - -### Growing the buffer - -I've written this method in a somewhat non-idiomatic manner, to make it closer -to how it would look in C using `realloc` instead. - -It would be more efficient to use slicing to insert the needed extra capacity -directly, instead of making a new buffer and copying characters over. - -```python -def grow(self, capacity: int) -> None: - assert capacity >= self.capacity - # Create a new buffer with the new capacity - new_buf = [""] * capacity - # Move the prefix/suffix to their place in the new buffer - added_capacity = capacity - len(self._buf) - new_buf[: self._gap_start] = self._buf[: self._gap_start] - new_buf[self._gap_end + added_capacity :] = self._buf[self._gap_end :] - # Use the new buffer, account for added capacity - self._buf = new_buf - self._gap_end += added_capacity -``` - -### Insertion - -Inserting text at the cursor's position means filling up the gap in the middle -of the buffer. To do so we must first make sure that the gap is big enough, or -grow the buffer accordingly. - -Then inserting the text is simply a matter of copying its characters in place, -and moving the start of the gap further right. - -```python -def insert(self, val: str) -> None: - # Ensure we have enough space to insert the whole string - if len(val) > self.gap_length: - self.grow(max(self.capacity * 2, self.string_length + len(val))) - # Fill the gap with the given string - self._buf[self._gap_start : self._gap_start + len(val)] = val - self._gap_start += len(val) -``` - -### Deletion - -Removing text from the buffer simply expands the gap in the corresponding -direction, shortening the string's prefix/suffix. This makes it very cheap. - -The methods are named after the `backspace` and `delete` keys on the keyboard. - -```python -def backspace(self, dist: int = 1) -> None: - assert dist <= self.prefix_length - # Extend gap to the left - self._gap_start -= dist - -def delete(self, dist: int = 1) -> None: - assert dist <= self.suffix_length - # Extend gap to the right - self._gap_end += dist -``` - -### Moving the cursor - -Moving the cursor along the buffer will shift letters from one side of the gap -to the other, moving them across from prefix to suffix and back. - -I find Python's list slicing not quite as elegant to read as a `memmove`, though -it does make for a very small and efficient implementation. - -```python -def left(self, dist: int = 1) -> None: - assert dist <= self.prefix_length - # Shift the needed number of characters from end of prefix to start of suffix - self._buf[self._gap_end - dist : self._gap_end] = self._buf[ - self._gap_start - dist : self._gap_start - ] - # Adjust indices accordingly - self._gap_start -= dist - self._gap_end -= dist - -def right(self, dist: int = 1) -> None: - assert dist <= self.suffix_length - # Shift the needed number of characters from start of suffix to end of prefix - self._buf[self._gap_start : self._gap_start + dist] = self._buf[ - self._gap_end : self._gap_end + dist - ] - # Adjust indices accordingly - self._gap_start += dist - self._gap_end += dist -``` diff --git a/content/posts/2024-07-14-bloom-filter/index.md b/content/posts/2024-07-14-bloom-filter/index.md deleted file mode 100644 index 86aca41..0000000 --- a/content/posts/2024-07-14-bloom-filter/index.md +++ /dev/null @@ -1,97 +0,0 @@ ---- -title: "Bloom Filter" -date: 2024-07-14T17:46:40+01:00 -draft: false # I don't care for draft mode, git has branches for that -description: "Probably cool" -tags: - - algorithms - - data structures - - python -categories: - - programming -series: - - Cool algorithms -favorite: false -disable_feed: false ---- - -The [_Bloom Filter_][wiki] is a probabilistic data structure for set membership. - -The filter can be used as an inexpensive first step when querying the actual -data is quite costly (e.g: as a first check for expensive cache lookups or large -data seeks). - -[wiki]: https://en.wikipedia.org/wiki/Bloom_filter - - - -## What does it do? - -A _Bloom Filter_ can be understood as a hash-set which can either tell you: - -* An element is _not_ part of the set. -* An element _may be_ part of the set. - -More specifically, one can tweak the parameters of the filter to make it so that -the _false positive_ rate of membership is quite low. - -I won't be going into those calculations here, but they are quite trivial to -compute, or one can just look up appropriate values for their use case. - -## Implementation - -I'll be using Python, which has the nifty ability of representing bitsets -through its built-in big integers quite easily. - -We'll be assuming a `BIT_COUNT` of 64 here, but the implementation can easily be -tweaked to use a different number, or even change it at construction time. - -### Representation - -A `BloomFilter` is just a set of bits and a list of hash functions. - -```python -BIT_COUNT = 64 - -class BloomFilter[T]: - _bits: int - _hash_functions: list[Callable[[T], int]] - - def __init__(self, hash_functions: list[Callable[[T], int]]) -> None: - # Filter is initially empty - self._bits = 0 - self._hash_functions = hash_functions -``` - -### Inserting a key - -To add an element to the filter, we take the output from each hash function and -use that to set a bit in the filter. This combination of bit will identify the -element, which we can use for lookup later. - -```python -def insert(self, val: T) -> None: - # Iterate over each hash - for f in self._hash_functions: - n = f(val) % BIT_COUNT - # Set the corresponding bit - self._bit |= 1 << n -``` - -### Querying a key - -Because the _Bloom Filter_ does not actually store its elements, but some -derived data from hashing them, it can only definitely say if an element _does -not_ belong to it. Otherwise, it _may_ be part of the set, and should be checked -against the actual underlying store. - -```python -def may_contain(self, val: T) -> bool: - for f in self._hash_functions: - n = f(val) % BIT_COUNT - # If one of the bits is unset, the value is definitely not present - if not (self._bit & (1 << n)): - return False - # All bits were matched, `val` is likely to be part of the set - return True -``` diff --git a/content/posts/2024-07-20-treap/index.md b/content/posts/2024-07-20-treap/index.md deleted file mode 100644 index 431e68b..0000000 --- a/content/posts/2024-07-20-treap/index.md +++ /dev/null @@ -1,159 +0,0 @@ ---- -title: "Treap" -date: 2024-07-20T14:12:27+01:00 -draft: false # I don't care for draft mode, git has branches for that -description: "A simpler BST" -tags: - - algorithms - - data structures - - python -categories: - - programming -series: - - Cool algorithms -favorite: false -disable_feed: false -graphviz: true ---- - -The [_Treap_][wiki] is a mix between a _Binary Search Tree_ and a _Heap_. - -Like a _Binary Search Tree_, it keeps an ordered set of keys in the shape of a -tree, allowing for binary search traversal. - -Like a _Heap_, it associates each node with a priority, making sure that a -parent's priority is always higher than any of its children. - -[wiki]: https://en.wikipedia.org/wiki/Treap - - - -## What does it do? - -By randomizing the priority value of each key at insertion time, we ensure a -high likelihood that the tree stays _roughly_ balanced, avoiding degenerating to -unbalanced O(N) height. - -Here's a sample tree created by inserting integers from 0 to 250 into the tree: - -{{< graphviz file="treap.gv" />}} - -## Implementation - -I'll be keeping the theme for this [series] by using Python to implement the -_Treap_. This leads to somewhat annoying code to handle the rotation process, -which is easier to do in C using pointers. - -[series]: {{< ref "/series/cool-algorithms/" >}} - -### Representation - -Creating a new `Treap` is easy: the tree starts off empty, waiting for new nodes -to insert. - -Each `Node` must keep track of the `key`, the mapped `value`, and the node's -`priority` (which is assigned randomly). Finally it must also allow for storing -two children (`left` and `right`). - -```python -class Node[K, V]: - key: K - value: V - priority: float - left: Node[K, V] | None - righg: Node[K, V] | None - - def __init__(self, key: K, value: V): - # Store key and value, like a normal BST node - self.key = key - self.value = value - # Priority is derived randomly - self.priority = random() - self.left = None - self.right = None - -class Treap[K, V]: - _root: Node[K, V] | None - - def __init__(self): - # The tree starts out empty - self._root = None -``` - -### Search - -Searching the tree is the same as in any other _Binary Search Tree_. - -```python -def get(self, key: K) -> T | None: - node = self._root - # The usual BST traversal - while node is not None: - if node.key == key: - return node.value - elif node.key < key: - node = node.right - else: - node = node.left - return None -``` - -### Insertion - -To insert a new `key` into the tree, we identify which leaf position it should -be inserted at. We then generate the node's priority, insert it at this -position, and rotate the node upwards until the heap property is respected. - -```python -type ChildField = Literal["left, right"] - -def insert(self, key: K, value: V) -> bool: - # Empty treap base-case - if self._root is None: - self._root = Node(key, value) - # Signal that we're not overwriting the value - return False - # Keep track of the parent chain for rotation after insertion - parents = [] - node = self._root - while node is not None: - # Insert a pre-existing key - if node.key == key: - node.value = value - return True - # Go down the tree, keep track of the path through the tree - field = "left" if key < node.key else "right" - parents.append((node, field)) - node = getattr(node, field) - # Key wasn't found, we're inserting a new node - child = Node(key, value) - parent, field = parents[-1] - setattr(parent, field, child) - # Rotate the new node up until we respect the decreasing priority property - self._rotate_up(child, parents) - # Key wasn't found, signal that we inserted a new node - return False - -def _rotate_up( - self, - node: Node[K, V], - parents: list[tuple[Node[K, V], ChildField]], -) -> None: - while parents: - parent, field = parents.pop() - # If the parent has higher priority, we're done rotating - if parent.priority >= node.priority: - break - # Check for grand-parent/root of tree edge-case - if parents: - # Update grand-parent to point to the new rotated node - grand_parent, field = parents[-1] - setattr(grand_parent, field, node) - else: - # Point the root to the new rotated node - self._root = node - other_field = "left" if field == "right" else "right" - # Rotate the node up - setattr(parent, field, getattr(node, other_field)) - setattr(node, other_field, parent) -``` diff --git a/content/posts/2024-07-20-treap/treap.gv b/content/posts/2024-07-20-treap/treap.gv deleted file mode 100644 index 156eff9..0000000 --- a/content/posts/2024-07-20-treap/treap.gv +++ /dev/null @@ -1,1004 +0,0 @@ -graph { - node [shape=circle] - 2[label=<2
0.9957869495270144>]; - 2 -- 1 [side=L] - 1[label=<1
0.8476937427586738>]; - 1 -- 0 [side=L] - 0[label=<0
0.7514570833207395>]; - 2 -- 145 [side=R] - 145[label=<145
0.9939888728473314>]; - 145 -- 12 [side=L] - 12[label=<12
0.9907319285760915>]; - 12 -- 5 [side=L] - 5[label=<5
0.988082370881665>]; - 5 -- 4 [side=L] - 4[label=<4
0.7422137887388395>]; - 4 -- 3 [side=L] - 3[label=<3
0.24440218394323976>]; - 5 -- 8 [side=R] - 8[label=<8
0.695547503791194>]; - 8 -- 6 [side=L] - 6[label=<6
0.5664414792316717>]; - 6 -- 7 [side=R] - 7[label=<7
0.11672897928969816>]; - 8 -- 11 [side=R] - 11[label=<11
0.45404068759973704>]; - 11 -- 9 [side=L] - 9[label=<9
0.3325186970255505>]; - 9 -- 10 [side=R] - 10[label=<10
0.10578109016044712>]; - 12 -- 47 [side=R] - 47[label=<47
0.9904839559513964>]; - 47 -- 33 [side=L] - 33[label=<33
0.9758188259688814>]; - 33 -- 28 [side=L] - 28[label=<28
0.915671870648212>]; - 28 -- 23 [side=L] - 23[label=<23
0.8487360320264957>]; - 23 -- 20 [side=L] - 20[label=<20
0.7063809958484802>]; - 20 -- 16 [side=L] - 16[label=<16
0.7034299359160114>]; - 16 -- 15 [side=L] - 15[label=<15
0.5922634040068163>]; - 15 -- 13 [side=L] - 13[label=<13
0.4543498778672278>]; - 13 -- 14 [side=R] - 14[label=<14
0.21605966187340298>]; - 16 -- 17 [side=R] - 17[label=<17
0.3709284358038115>]; - 17 -- 18 [side=R] - 18[label=<18
0.260599350868457>]; - 18 -- 19 [side=R] - 19[label=<19
0.1817886048339078>]; - 20 -- 21 [side=R] - 21[label=<21
0.14210053829804103>]; - 21 -- 22 [side=R] - 22[label=<22
0.08869917426783513>]; - 23 -- 25 [side=R] - 25[label=<25
0.4892716208875628>]; - 25 -- 24 [side=L] - 24[label=<24
0.25363061372658946>]; - 25 -- 26 [side=R] - 26[label=<26
0.13796660931463622>]; - 26 -- 27 [side=R] - 27[label=<27
0.12997273552181765>]; - 28 -- 31 [side=R] - 31[label=<31
0.8601319422972336>]; - 31 -- 29 [side=L] - 29[label=<29
0.8084698172467167>]; - 29 -- 30 [side=R] - 30[label=<30
0.27908621178937176>]; - 31 -- 32 [side=R] - 32[label=<32
0.0844421977476495>]; - 33 -- 35 [side=R] - 35[label=<35
0.967049096465691>]; - 35 -- 34 [side=L] - 34[label=<34
0.12377873295631248>]; - 35 -- 39 [side=R] - 39[label=<39
0.9328169604588316>]; - 39 -- 38 [side=L] - 38[label=<38
0.7787222122657034>]; - 38 -- 37 [side=L] - 37[label=<37
0.4043690842717128>]; - 37 -- 36 [side=L] - 36[label=<36
0.3046740248996985>]; - 39 -- 43 [side=R] - 43[label=<43
0.8350649185763811>]; - 43 -- 40 [side=L] - 40[label=<40
0.5406537578359348>]; - 40 -- 42 [side=R] - 42[label=<42
0.3193437618727414>]; - 42 -- 41 [side=L] - 41[label=<41
0.27788504363715394>]; - 43 -- 44 [side=R] - 44[label=<44
0.4830145820991081>]; - 44 -- 46 [side=R] - 46[label=<46
0.33052118143151343>]; - 46 -- 45 [side=L] - 45[label=<45
0.29551968300055065>]; - 47 -- 114 [side=R] - 114[label=<114
0.9876511078116038>]; - 114 -- 86 [side=L] - 86[label=<86
0.9735379804960496>]; - 86 -- 77 [side=L] - 77[label=<77
0.9269342780771458>]; - 77 -- 68 [side=L] - 68[label=<68
0.9230582358699485>]; - 68 -- 55 [side=L] - 55[label=<55
0.839490204679298>]; - 55 -- 51 [side=L] - 51[label=<51
0.3385715557270519>]; - 51 -- 50 [side=L] - 50[label=<50
0.3360608605201836>]; - 50 -- 49 [side=L] - 49[label=<49
0.2774445142713422>]; - 49 -- 48 [side=L] - 48[label=<48
0.17489390663513982>]; - 51 -- 52 [side=R] - 52[label=<52
0.1886079446757799>]; - 52 -- 54 [side=R] - 54[label=<54
0.12293678964136756>]; - 54 -- 53 [side=L] - 53[label=<53
0.08266549169366844>]; - 55 -- 64 [side=R] - 64[label=<64
0.5480541758673416>]; - 64 -- 61 [side=L] - 61[label=<61
0.4333510895474586>]; - 61 -- 58 [side=L] - 58[label=<58
0.22965679673111572>]; - 58 -- 57 [side=L] - 57[label=<57
0.1750392554452569>]; - 57 -- 56 [side=L] - 56[label=<56
0.1154682181055644>]; - 58 -- 60 [side=R] - 60[label=<60
0.19505416228675265>]; - 60 -- 59 [side=L] - 59[label=<59
0.15628574332026035>]; - 61 -- 63 [side=R] - 63[label=<63
0.23625159275800722>]; - 63 -- 62 [side=L] - 62[label=<62
0.10778840213677576>]; - 64 -- 66 [side=R] - 66[label=<66
0.4795221664420316>]; - 66 -- 65 [side=L] - 65[label=<65
0.29689544767487486>]; - 66 -- 67 [side=R] - 67[label=<67
0.1446107813011439>]; - 68 -- 75 [side=R] - 75[label=<75
0.9136368186744673>]; - 75 -- 73 [side=L] - 73[label=<73
0.8180762618516633>]; - 73 -- 72 [side=L] - 72[label=<72
0.790052528538962>]; - 72 -- 69 [side=L] - 69[label=<69
0.6662534331501744>]; - 69 -- 70 [side=R] - 70[label=<70
0.198170103856931>]; - 70 -- 71 [side=R] - 71[label=<71
0.09798170568469355>]; - 73 -- 74 [side=R] - 74[label=<74
0.3113326781632004>]; - 75 -- 76 [side=R] - 76[label=<76
0.878956270284837>]; - 77 -- 84 [side=R] - 84[label=<84
0.7298827990732385>]; - 84 -- 81 [side=L] - 81[label=<81
0.7294243899593819>]; - 81 -- 78 [side=L] - 78[label=<78
0.6751780813023192>]; - 78 -- 80 [side=R] - 80[label=<80
0.2765792284019555>]; - 80 -- 79 [side=L] - 79[label=<79
0.12189576635143229>]; - 81 -- 83 [side=R] - 83[label=<83
0.5986915452921237>]; - 83 -- 82 [side=L] - 82[label=<82
0.5846092825823913>]; - 84 -- 85 [side=R] - 85[label=<85
0.5030186847252209>]; - 86 -- 103 [side=R] - 103[label=<103
0.968253245483458>]; - 103 -- 88 [side=L] - 88[label=<88
0.9609670084738324>]; - 88 -- 87 [side=L] - 87[label=<87
0.5446892558733331>]; - 88 -- 97 [side=R] - 97[label=<97
0.9483161815322799>]; - 97 -- 92 [side=L] - 92[label=<92
0.9417148997777522>]; - 92 -- 89 [side=L] - 89[label=<89
0.2455511319571373>]; - 89 -- 90 [side=R] - 90[label=<90
0.15647633602612276>]; - 90 -- 91 [side=R] - 91[label=<91
0.13997459061178652>]; - 92 -- 93 [side=R] - 93[label=<93
0.7804508194274744>]; - 93 -- 95 [side=R] - 95[label=<95
0.7226672256819942>]; - 95 -- 94 [side=L] - 94[label=<94
0.43503881905350683>]; - 95 -- 96 [side=R] - 96[label=<96
0.40712673636645136>]; - 97 -- 101 [side=R] - 101[label=<101
0.9255359833317219>]; - 101 -- 99 [side=L] - 99[label=<99
0.7139348283525223>]; - 99 -- 98 [side=L] - 98[label=<98
0.21279150783985867>]; - 99 -- 100 [side=R] - 100[label=<100
0.48002453571456083>]; - 101 -- 102 [side=R] - 102[label=<102
0.6219674393805691>]; - 103 -- 106 [side=R] - 106[label=<106
0.8731379458552129>]; - 106 -- 104 [side=L] - 104[label=<104
0.5400277589431796>]; - 104 -- 105 [side=R] - 105[label=<105
0.0007891566377928871>]; - 106 -- 110 [side=R] - 110[label=<110
0.8382929650437165>]; - 110 -- 109 [side=L] - 109[label=<109
0.4615149406437542>]; - 109 -- 107 [side=L] - 107[label=<107
0.43337904959817486>]; - 107 -- 108 [side=R] - 108[label=<108
0.053658620013432023>]; - 110 -- 112 [side=R] - 112[label=<112
0.5679810533328041>]; - 112 -- 111 [side=L] - 111[label=<111
0.541018763444613>]; - 112 -- 113 [side=R] - 113[label=<113
0.42841646768139463>]; - 114 -- 123 [side=R] - 123[label=<123
0.9464643826259924>]; - 123 -- 119 [side=L] - 119[label=<119
0.8662877311047388>]; - 119 -- 116 [side=L] - 116[label=<116
0.7670170824076343>]; - 116 -- 115 [side=L] - 115[label=<115
0.17190031141596263>]; - 116 -- 118 [side=R] - 118[label=<118
0.3671674221014496>]; - 118 -- 117 [side=L] - 117[label=<117
0.2159590427591035>]; - 119 -- 121 [side=R] - 121[label=<121
0.74738638353363>]; - 121 -- 120 [side=L] - 120[label=<120
0.08665916013292596>]; - 121 -- 122 [side=R] - 122[label=<122
0.4119543486009871>]; - 123 -- 143 [side=R] - 143[label=<143
0.9247442715890171>]; - 143 -- 140 [side=L] - 140[label=<140
0.8936337545064225>]; - 140 -- 125 [side=L] - 125[label=<125
0.8517904638602968>]; - 125 -- 124 [side=L] - 124[label=<124
0.681815353261336>]; - 125 -- 126 [side=R] - 126[label=<126
0.7946927108945189>]; - 126 -- 133 [side=R] - 133[label=<133
0.7289879153749418>]; - 133 -- 128 [side=L] - 128[label=<128
0.7256302169867033>]; - 128 -- 127 [side=L] - 127[label=<127
0.6200343851508556>]; - 128 -- 131 [side=R] - 131[label=<131
0.7089639682237622>]; - 131 -- 129 [side=L] - 129[label=<129
0.5255088944172916>]; - 129 -- 130 [side=R] - 130[label=<130
0.41550021924797953>]; - 131 -- 132 [side=R] - 132[label=<132
0.1851371011016204>]; - 133 -- 134 [side=R] - 134[label=<134
0.6333388625482715>]; - 134 -- 135 [side=R] - 135[label=<135
0.4707182839428111>]; - 135 -- 138 [side=R] - 138[label=<138
0.4128350639522185>]; - 138 -- 137 [side=L] - 137[label=<137
0.25475301393491523>]; - 137 -- 136 [side=L] - 136[label=<136
0.04841095612377411>]; - 138 -- 139 [side=R] - 139[label=<139
0.3855980374729622>]; - 140 -- 141 [side=R] - 141[label=<141
0.8655582249610275>]; - 141 -- 142 [side=R] - 142[label=<142
0.2288591915235536>]; - 143 -- 144 [side=R] - 144[label=<144
0.36710686055015185>]; - 145 -- 235 [side=R] - 235[label=<235
0.9812914040931304>]; - 235 -- 169 [side=L] - 169[label=<169
0.9782585107854103>]; - 169 -- 148 [side=L] - 148[label=<148
0.9449023406787344>]; - 148 -- 147 [side=L] - 147[label=<147
0.7293740098286456>]; - 147 -- 146 [side=L] - 146[label=<146
0.509027051210447>]; - 148 -- 150 [side=R] - 150[label=<150
0.9020436951014624>]; - 150 -- 149 [side=L] - 149[label=<149
0.8190200516813236>]; - 150 -- 153 [side=R] - 153[label=<153
0.8855777631108436>]; - 153 -- 152 [side=L] - 152[label=<152
0.8603376027563862>]; - 152 -- 151 [side=L] - 151[label=<151
0.45459695989529647>]; - 153 -- 167 [side=R] - 167[label=<167
0.8492899523613442>]; - 167 -- 163 [side=L] - 163[label=<163
0.8177575678069656>]; - 163 -- 160 [side=L] - 160[label=<160
0.7759852430212377>]; - 160 -- 158 [side=L] - 158[label=<158
0.734924251943539>]; - 158 -- 156 [side=L] - 156[label=<156
0.710110660084087>]; - 156 -- 155 [side=L] - 155[label=<155
0.2959097323390557>]; - 155 -- 154 [side=L] - 154[label=<154
0.23060113716089548>]; - 156 -- 157 [side=R] - 157[label=<157
0.04398748527099361>]; - 158 -- 159 [side=R] - 159[label=<159
0.09047000528918914>]; - 160 -- 162 [side=R] - 162[label=<162
0.7216878041963218>]; - 162 -- 161 [side=L] - 161[label=<161
0.6871202472393689>]; - 163 -- 165 [side=R] - 165[label=<165
0.3886140063494328>]; - 165 -- 164 [side=L] - 164[label=<164
0.3870484684485387>]; - 165 -- 166 [side=R] - 166[label=<166
0.3476475192762013>]; - 167 -- 168 [side=R] - 168[label=<168
0.8169397159175148>]; - 169 -- 172 [side=R] - 172[label=<172
0.9747928133504129>]; - 172 -- 170 [side=L] - 170[label=<170
0.846452976104617>]; - 170 -- 171 [side=R] - 171[label=<171
0.3121690933042578>]; - 172 -- 230 [side=R] - 230[label=<230
0.958355558453681>]; - 230 -- 187 [side=L] - 187[label=<187
0.957557676245447>]; - 187 -- 180 [side=L] - 180[label=<180
0.92710438449752>]; - 180 -- 174 [side=L] - 174[label=<174
0.7049973940003331>]; - 174 -- 173 [side=L] - 173[label=<173
0.152023809588824>]; - 174 -- 179 [side=R] - 179[label=<179
0.6715227532708304>]; - 179 -- 176 [side=L] - 176[label=<176
0.6621862385446002>]; - 176 -- 175 [side=L] - 175[label=<175
0.1248086920689816>]; - 176 -- 178 [side=R] - 178[label=<178
0.5713418767827186>]; - 178 -- 177 [side=L] - 177[label=<177
0.5560171098718876>]; - 180 -- 183 [side=R] - 183[label=<183
0.9250644230760526>]; - 183 -- 182 [side=L] - 182[label=<182
0.6824354777265715>]; - 182 -- 181 [side=L] - 181[label=<181
0.012508171868127804>]; - 183 -- 184 [side=R] - 184[label=<184
0.4823271793225655>]; - 184 -- 185 [side=R] - 185[label=<185
0.3841022310929997>]; - 185 -- 186 [side=R] - 186[label=<186
0.18775042542247533>]; - 187 -- 203 [side=R] - 203[label=<203
0.8944186382789656>]; - 203 -- 194 [side=L] - 194[label=<194
0.878541152511799>]; - 194 -- 190 [side=L] - 190[label=<190
0.6762694909925744>]; - 190 -- 188 [side=L] - 188[label=<188
0.3829541869540283>]; - 188 -- 189 [side=R] - 189[label=<189
0.20943675668859407>]; - 190 -- 192 [side=R] - 192[label=<192
0.5817469625147841>]; - 192 -- 191 [side=L] - 191[label=<191
0.09503744303386963>]; - 192 -- 193 [side=R] - 193[label=<193
0.3644771285875493>]; - 194 -- 197 [side=R] - 197[label=<197
0.7948407869124952>]; - 197 -- 195 [side=L] - 195[label=<195
0.7942811285510138>]; - 195 -- 196 [side=R] - 196[label=<196
0.5627582929517205>]; - 197 -- 202 [side=R] - 202[label=<202
0.6971384514391066>]; - 202 -- 198 [side=L] - 198[label=<198
0.6927264474372884>]; - 198 -- 200 [side=R] - 200[label=<200
0.6455091049085492>]; - 200 -- 199 [side=L] - 199[label=<199
0.5952913724552451>]; - 200 -- 201 [side=R] - 201[label=<201
0.02134082793304981>]; - 203 -- 222 [side=R] - 222[label=<222
0.7905661844403327>]; - 222 -- 221 [side=L] - 221[label=<221
0.7344461814842598>]; - 221 -- 218 [side=L] - 218[label=<218
0.710158738001777>]; - 218 -- 214 [side=L] - 214[label=<214
0.6681641416928006>]; - 214 -- 210 [side=L] - 210[label=<210
0.6445674840025054>]; - 210 -- 204 [side=L] - 204[label=<204
0.4613335241960753>]; - 204 -- 207 [side=R] - 207[label=<207
0.4534069998012402>]; - 207 -- 205 [side=L] - 205[label=<205
0.3958801765835097>]; - 205 -- 206 [side=R] - 206[label=<206
0.11317604266906478>]; - 207 -- 208 [side=R] - 208[label=<208
0.15215468537761012>]; - 208 -- 209 [side=R] - 209[label=<209
0.1517155863611125>]; - 210 -- 213 [side=R] - 213[label=<213
0.5254528847170963>]; - 213 -- 212 [side=L] - 212[label=<212
0.3831362875816451>]; - 212 -- 211 [side=L] - 211[label=<211
0.05056020282973139>]; - 214 -- 215 [side=R] - 215[label=<215
0.5009367015686823>]; - 215 -- 216 [side=R] - 216[label=<216
0.10901874828554337>]; - 216 -- 217 [side=R] - 217[label=<217
0.05646524838685241>]; - 218 -- 220 [side=R] - 220[label=<220
0.674411240175005>]; - 220 -- 219 [side=L] - 219[label=<219
0.14299197003272757>]; - 222 -- 223 [side=R] - 223[label=<223
0.6736901076540927>]; - 223 -- 226 [side=R] - 226[label=<226
0.6137254504515176>]; - 226 -- 225 [side=L] - 225[label=<225
0.42057192814517086>]; - 225 -- 224 [side=L] - 224[label=<224
0.15459807255670532>]; - 226 -- 227 [side=R] - 227[label=<227
0.3240860179195383>]; - 227 -- 229 [side=R] - 229[label=<229
0.1917962317572811>]; - 229 -- 228 [side=L] - 228[label=<228
0.09544487292662296>]; - 230 -- 231 [side=R] - 231[label=<231
0.9537855788342208>]; - 231 -- 234 [side=R] - 234[label=<234
0.8201857812416328>]; - 234 -- 233 [side=L] - 233[label=<233
0.38950247492509926>]; - 233 -- 232 [side=L] - 232[label=<232
0.3117950672088875>]; - 235 -- 245 [side=R] - 245[label=<245
0.9606326065278836>]; - 245 -- 243 [side=L] - 243[label=<243
0.9010055124673463>]; - 243 -- 237 [side=L] - 237[label=<237
0.8906258722253291>]; - 237 -- 236 [side=L] - 236[label=<236
0.699210568923407>]; - 237 -- 239 [side=R] - 239[label=<239
0.8460173468831691>]; - 239 -- 238 [side=L] - 238[label=<238
0.11026801706799916>]; - 239 -- 242 [side=R] - 242[label=<242
0.8439586286718704>]; - 242 -- 240 [side=L] - 240[label=<240
0.6602788030770246>]; - 240 -- 241 [side=R] - 241[label=<241
0.5119345141518413>]; - 243 -- 244 [side=R] - 244[label=<244
0.7247536554274525>]; - 245 -- 246 [side=R] - 246[label=<246
0.5721754588545217>]; - 246 -- 247 [side=R] - 247[label=<247
0.5181752052426042>]; - 247 -- 249 [side=R] - 249[label=<249
0.07619775425067954>]; - 249 -- 248 [side=L] - 248[label=<248
0.03123446276525388>]; -} -graph { - node [shape=circle] - 2[label=<2
0.9957869495270144>]; - 2 -- 1 [side=L] - 1[label=<1
0.8476937427586738>]; - 1 -- 0 [side=L] - 0[label=<0
0.7514570833207395>]; - 2 -- 145 [side=R] - 145[label=<145
0.9939888728473314>]; - 145 -- 12 [side=L] - 12[label=<12
0.9907319285760915>]; - 12 -- 5 [side=L] - 5[label=<5
0.988082370881665>]; - 5 -- 4 [side=L] - 4[label=<4
0.7422137887388395>]; - 4 -- 3 [side=L] - 3[label=<3
0.24440218394323976>]; - 5 -- 8 [side=R] - 8[label=<8
0.695547503791194>]; - 8 -- 6 [side=L] - 6[label=<6
0.5664414792316717>]; - 6 -- 7 [side=R] - 7[label=<7
0.11672897928969816>]; - 8 -- 11 [side=R] - 11[label=<11
0.45404068759973704>]; - 11 -- 9 [side=L] - 9[label=<9
0.3325186970255505>]; - 9 -- 10 [side=R] - 10[label=<10
0.10578109016044712>]; - 12 -- 47 [side=R] - 47[label=<47
0.9904839559513964>]; - 47 -- 33 [side=L] - 33[label=<33
0.9758188259688814>]; - 33 -- 28 [side=L] - 28[label=<28
0.915671870648212>]; - 28 -- 23 [side=L] - 23[label=<23
0.8487360320264957>]; - 23 -- 20 [side=L] - 20[label=<20
0.7063809958484802>]; - 20 -- 16 [side=L] - 16[label=<16
0.7034299359160114>]; - 16 -- 15 [side=L] - 15[label=<15
0.5922634040068163>]; - 15 -- 13 [side=L] - 13[label=<13
0.4543498778672278>]; - 13 -- 14 [side=R] - 14[label=<14
0.21605966187340298>]; - 16 -- 17 [side=R] - 17[label=<17
0.3709284358038115>]; - 17 -- 18 [side=R] - 18[label=<18
0.260599350868457>]; - 18 -- 19 [side=R] - 19[label=<19
0.1817886048339078>]; - 20 -- 21 [side=R] - 21[label=<21
0.14210053829804103>]; - 21 -- 22 [side=R] - 22[label=<22
0.08869917426783513>]; - 23 -- 25 [side=R] - 25[label=<25
0.4892716208875628>]; - 25 -- 24 [side=L] - 24[label=<24
0.25363061372658946>]; - 25 -- 26 [side=R] - 26[label=<26
0.13796660931463622>]; - 26 -- 27 [side=R] - 27[label=<27
0.12997273552181765>]; - 28 -- 31 [side=R] - 31[label=<31
0.8601319422972336>]; - 31 -- 29 [side=L] - 29[label=<29
0.8084698172467167>]; - 29 -- 30 [side=R] - 30[label=<30
0.27908621178937176>]; - 31 -- 32 [side=R] - 32[label=<32
0.0844421977476495>]; - 33 -- 35 [side=R] - 35[label=<35
0.967049096465691>]; - 35 -- 34 [side=L] - 34[label=<34
0.12377873295631248>]; - 35 -- 39 [side=R] - 39[label=<39
0.9328169604588316>]; - 39 -- 38 [side=L] - 38[label=<38
0.7787222122657034>]; - 38 -- 37 [side=L] - 37[label=<37
0.4043690842717128>]; - 37 -- 36 [side=L] - 36[label=<36
0.3046740248996985>]; - 39 -- 43 [side=R] - 43[label=<43
0.8350649185763811>]; - 43 -- 40 [side=L] - 40[label=<40
0.5406537578359348>]; - 40 -- 42 [side=R] - 42[label=<42
0.3193437618727414>]; - 42 -- 41 [side=L] - 41[label=<41
0.27788504363715394>]; - 43 -- 44 [side=R] - 44[label=<44
0.4830145820991081>]; - 44 -- 46 [side=R] - 46[label=<46
0.33052118143151343>]; - 46 -- 45 [side=L] - 45[label=<45
0.29551968300055065>]; - 47 -- 114 [side=R] - 114[label=<114
0.9876511078116038>]; - 114 -- 86 [side=L] - 86[label=<86
0.9735379804960496>]; - 86 -- 77 [side=L] - 77[label=<77
0.9269342780771458>]; - 77 -- 68 [side=L] - 68[label=<68
0.9230582358699485>]; - 68 -- 55 [side=L] - 55[label=<55
0.839490204679298>]; - 55 -- 51 [side=L] - 51[label=<51
0.3385715557270519>]; - 51 -- 50 [side=L] - 50[label=<50
0.3360608605201836>]; - 50 -- 49 [side=L] - 49[label=<49
0.2774445142713422>]; - 49 -- 48 [side=L] - 48[label=<48
0.17489390663513982>]; - 51 -- 52 [side=R] - 52[label=<52
0.1886079446757799>]; - 52 -- 54 [side=R] - 54[label=<54
0.12293678964136756>]; - 54 -- 53 [side=L] - 53[label=<53
0.08266549169366844>]; - 55 -- 64 [side=R] - 64[label=<64
0.5480541758673416>]; - 64 -- 61 [side=L] - 61[label=<61
0.4333510895474586>]; - 61 -- 58 [side=L] - 58[label=<58
0.22965679673111572>]; - 58 -- 57 [side=L] - 57[label=<57
0.1750392554452569>]; - 57 -- 56 [side=L] - 56[label=<56
0.1154682181055644>]; - 58 -- 60 [side=R] - 60[label=<60
0.19505416228675265>]; - 60 -- 59 [side=L] - 59[label=<59
0.15628574332026035>]; - 61 -- 63 [side=R] - 63[label=<63
0.23625159275800722>]; - 63 -- 62 [side=L] - 62[label=<62
0.10778840213677576>]; - 64 -- 66 [side=R] - 66[label=<66
0.4795221664420316>]; - 66 -- 65 [side=L] - 65[label=<65
0.29689544767487486>]; - 66 -- 67 [side=R] - 67[label=<67
0.1446107813011439>]; - 68 -- 75 [side=R] - 75[label=<75
0.9136368186744673>]; - 75 -- 73 [side=L] - 73[label=<73
0.8180762618516633>]; - 73 -- 72 [side=L] - 72[label=<72
0.790052528538962>]; - 72 -- 69 [side=L] - 69[label=<69
0.6662534331501744>]; - 69 -- 70 [side=R] - 70[label=<70
0.198170103856931>]; - 70 -- 71 [side=R] - 71[label=<71
0.09798170568469355>]; - 73 -- 74 [side=R] - 74[label=<74
0.3113326781632004>]; - 75 -- 76 [side=R] - 76[label=<76
0.878956270284837>]; - 77 -- 84 [side=R] - 84[label=<84
0.7298827990732385>]; - 84 -- 81 [side=L] - 81[label=<81
0.7294243899593819>]; - 81 -- 78 [side=L] - 78[label=<78
0.6751780813023192>]; - 78 -- 80 [side=R] - 80[label=<80
0.2765792284019555>]; - 80 -- 79 [side=L] - 79[label=<79
0.12189576635143229>]; - 81 -- 83 [side=R] - 83[label=<83
0.5986915452921237>]; - 83 -- 82 [side=L] - 82[label=<82
0.5846092825823913>]; - 84 -- 85 [side=R] - 85[label=<85
0.5030186847252209>]; - 86 -- 103 [side=R] - 103[label=<103
0.968253245483458>]; - 103 -- 88 [side=L] - 88[label=<88
0.9609670084738324>]; - 88 -- 87 [side=L] - 87[label=<87
0.5446892558733331>]; - 88 -- 97 [side=R] - 97[label=<97
0.9483161815322799>]; - 97 -- 92 [side=L] - 92[label=<92
0.9417148997777522>]; - 92 -- 89 [side=L] - 89[label=<89
0.2455511319571373>]; - 89 -- 90 [side=R] - 90[label=<90
0.15647633602612276>]; - 90 -- 91 [side=R] - 91[label=<91
0.13997459061178652>]; - 92 -- 93 [side=R] - 93[label=<93
0.7804508194274744>]; - 93 -- 95 [side=R] - 95[label=<95
0.7226672256819942>]; - 95 -- 94 [side=L] - 94[label=<94
0.43503881905350683>]; - 95 -- 96 [side=R] - 96[label=<96
0.40712673636645136>]; - 97 -- 101 [side=R] - 101[label=<101
0.9255359833317219>]; - 101 -- 99 [side=L] - 99[label=<99
0.7139348283525223>]; - 99 -- 98 [side=L] - 98[label=<98
0.21279150783985867>]; - 99 -- 100 [side=R] - 100[label=<100
0.48002453571456083>]; - 101 -- 102 [side=R] - 102[label=<102
0.6219674393805691>]; - 103 -- 106 [side=R] - 106[label=<106
0.8731379458552129>]; - 106 -- 104 [side=L] - 104[label=<104
0.5400277589431796>]; - 104 -- 105 [side=R] - 105[label=<105
0.0007891566377928871>]; - 106 -- 110 [side=R] - 110[label=<110
0.8382929650437165>]; - 110 -- 109 [side=L] - 109[label=<109
0.4615149406437542>]; - 109 -- 107 [side=L] - 107[label=<107
0.43337904959817486>]; - 107 -- 108 [side=R] - 108[label=<108
0.053658620013432023>]; - 110 -- 112 [side=R] - 112[label=<112
0.5679810533328041>]; - 112 -- 111 [side=L] - 111[label=<111
0.541018763444613>]; - 112 -- 113 [side=R] - 113[label=<113
0.42841646768139463>]; - 114 -- 123 [side=R] - 123[label=<123
0.9464643826259924>]; - 123 -- 119 [side=L] - 119[label=<119
0.8662877311047388>]; - 119 -- 116 [side=L] - 116[label=<116
0.7670170824076343>]; - 116 -- 115 [side=L] - 115[label=<115
0.17190031141596263>]; - 116 -- 118 [side=R] - 118[label=<118
0.3671674221014496>]; - 118 -- 117 [side=L] - 117[label=<117
0.2159590427591035>]; - 119 -- 121 [side=R] - 121[label=<121
0.74738638353363>]; - 121 -- 120 [side=L] - 120[label=<120
0.08665916013292596>]; - 121 -- 122 [side=R] - 122[label=<122
0.4119543486009871>]; - 123 -- 143 [side=R] - 143[label=<143
0.9247442715890171>]; - 143 -- 140 [side=L] - 140[label=<140
0.8936337545064225>]; - 140 -- 125 [side=L] - 125[label=<125
0.8517904638602968>]; - 125 -- 124 [side=L] - 124[label=<124
0.681815353261336>]; - 125 -- 126 [side=R] - 126[label=<126
0.7946927108945189>]; - 126 -- 133 [side=R] - 133[label=<133
0.7289879153749418>]; - 133 -- 128 [side=L] - 128[label=<128
0.7256302169867033>]; - 128 -- 127 [side=L] - 127[label=<127
0.6200343851508556>]; - 128 -- 131 [side=R] - 131[label=<131
0.7089639682237622>]; - 131 -- 129 [side=L] - 129[label=<129
0.5255088944172916>]; - 129 -- 130 [side=R] - 130[label=<130
0.41550021924797953>]; - 131 -- 132 [side=R] - 132[label=<132
0.1851371011016204>]; - 133 -- 134 [side=R] - 134[label=<134
0.6333388625482715>]; - 134 -- 135 [side=R] - 135[label=<135
0.4707182839428111>]; - 135 -- 138 [side=R] - 138[label=<138
0.4128350639522185>]; - 138 -- 137 [side=L] - 137[label=<137
0.25475301393491523>]; - 137 -- 136 [side=L] - 136[label=<136
0.04841095612377411>]; - 138 -- 139 [side=R] - 139[label=<139
0.3855980374729622>]; - 140 -- 141 [side=R] - 141[label=<141
0.8655582249610275>]; - 141 -- 142 [side=R] - 142[label=<142
0.2288591915235536>]; - 143 -- 144 [side=R] - 144[label=<144
0.36710686055015185>]; - 145 -- 235 [side=R] - 235[label=<235
0.9812914040931304>]; - 235 -- 169 [side=L] - 169[label=<169
0.9782585107854103>]; - 169 -- 148 [side=L] - 148[label=<148
0.9449023406787344>]; - 148 -- 147 [side=L] - 147[label=<147
0.7293740098286456>]; - 147 -- 146 [side=L] - 146[label=<146
0.509027051210447>]; - 148 -- 150 [side=R] - 150[label=<150
0.9020436951014624>]; - 150 -- 149 [side=L] - 149[label=<149
0.8190200516813236>]; - 150 -- 153 [side=R] - 153[label=<153
0.8855777631108436>]; - 153 -- 152 [side=L] - 152[label=<152
0.8603376027563862>]; - 152 -- 151 [side=L] - 151[label=<151
0.45459695989529647>]; - 153 -- 167 [side=R] - 167[label=<167
0.8492899523613442>]; - 167 -- 163 [side=L] - 163[label=<163
0.8177575678069656>]; - 163 -- 160 [side=L] - 160[label=<160
0.7759852430212377>]; - 160 -- 158 [side=L] - 158[label=<158
0.734924251943539>]; - 158 -- 156 [side=L] - 156[label=<156
0.710110660084087>]; - 156 -- 155 [side=L] - 155[label=<155
0.2959097323390557>]; - 155 -- 154 [side=L] - 154[label=<154
0.23060113716089548>]; - 156 -- 157 [side=R] - 157[label=<157
0.04398748527099361>]; - 158 -- 159 [side=R] - 159[label=<159
0.09047000528918914>]; - 160 -- 162 [side=R] - 162[label=<162
0.7216878041963218>]; - 162 -- 161 [side=L] - 161[label=<161
0.6871202472393689>]; - 163 -- 165 [side=R] - 165[label=<165
0.3886140063494328>]; - 165 -- 164 [side=L] - 164[label=<164
0.3870484684485387>]; - 165 -- 166 [side=R] - 166[label=<166
0.3476475192762013>]; - 167 -- 168 [side=R] - 168[label=<168
0.8169397159175148>]; - 169 -- 172 [side=R] - 172[label=<172
0.9747928133504129>]; - 172 -- 170 [side=L] - 170[label=<170
0.846452976104617>]; - 170 -- 171 [side=R] - 171[label=<171
0.3121690933042578>]; - 172 -- 230 [side=R] - 230[label=<230
0.958355558453681>]; - 230 -- 187 [side=L] - 187[label=<187
0.957557676245447>]; - 187 -- 180 [side=L] - 180[label=<180
0.92710438449752>]; - 180 -- 174 [side=L] - 174[label=<174
0.7049973940003331>]; - 174 -- 173 [side=L] - 173[label=<173
0.152023809588824>]; - 174 -- 179 [side=R] - 179[label=<179
0.6715227532708304>]; - 179 -- 176 [side=L] - 176[label=<176
0.6621862385446002>]; - 176 -- 175 [side=L] - 175[label=<175
0.1248086920689816>]; - 176 -- 178 [side=R] - 178[label=<178
0.5713418767827186>]; - 178 -- 177 [side=L] - 177[label=<177
0.5560171098718876>]; - 180 -- 183 [side=R] - 183[label=<183
0.9250644230760526>]; - 183 -- 182 [side=L] - 182[label=<182
0.6824354777265715>]; - 182 -- 181 [side=L] - 181[label=<181
0.012508171868127804>]; - 183 -- 184 [side=R] - 184[label=<184
0.4823271793225655>]; - 184 -- 185 [side=R] - 185[label=<185
0.3841022310929997>]; - 185 -- 186 [side=R] - 186[label=<186
0.18775042542247533>]; - 187 -- 203 [side=R] - 203[label=<203
0.8944186382789656>]; - 203 -- 194 [side=L] - 194[label=<194
0.878541152511799>]; - 194 -- 190 [side=L] - 190[label=<190
0.6762694909925744>]; - 190 -- 188 [side=L] - 188[label=<188
0.3829541869540283>]; - 188 -- 189 [side=R] - 189[label=<189
0.20943675668859407>]; - 190 -- 192 [side=R] - 192[label=<192
0.5817469625147841>]; - 192 -- 191 [side=L] - 191[label=<191
0.09503744303386963>]; - 192 -- 193 [side=R] - 193[label=<193
0.3644771285875493>]; - 194 -- 197 [side=R] - 197[label=<197
0.7948407869124952>]; - 197 -- 195 [side=L] - 195[label=<195
0.7942811285510138>]; - 195 -- 196 [side=R] - 196[label=<196
0.5627582929517205>]; - 197 -- 202 [side=R] - 202[label=<202
0.6971384514391066>]; - 202 -- 198 [side=L] - 198[label=<198
0.6927264474372884>]; - 198 -- 200 [side=R] - 200[label=<200
0.6455091049085492>]; - 200 -- 199 [side=L] - 199[label=<199
0.5952913724552451>]; - 200 -- 201 [side=R] - 201[label=<201
0.02134082793304981>]; - 203 -- 222 [side=R] - 222[label=<222
0.7905661844403327>]; - 222 -- 221 [side=L] - 221[label=<221
0.7344461814842598>]; - 221 -- 218 [side=L] - 218[label=<218
0.710158738001777>]; - 218 -- 214 [side=L] - 214[label=<214
0.6681641416928006>]; - 214 -- 210 [side=L] - 210[label=<210
0.6445674840025054>]; - 210 -- 204 [side=L] - 204[label=<204
0.4613335241960753>]; - 204 -- 207 [side=R] - 207[label=<207
0.4534069998012402>]; - 207 -- 205 [side=L] - 205[label=<205
0.3958801765835097>]; - 205 -- 206 [side=R] - 206[label=<206
0.11317604266906478>]; - 207 -- 208 [side=R] - 208[label=<208
0.15215468537761012>]; - 208 -- 209 [side=R] - 209[label=<209
0.1517155863611125>]; - 210 -- 213 [side=R] - 213[label=<213
0.5254528847170963>]; - 213 -- 212 [side=L] - 212[label=<212
0.3831362875816451>]; - 212 -- 211 [side=L] - 211[label=<211
0.05056020282973139>]; - 214 -- 215 [side=R] - 215[label=<215
0.5009367015686823>]; - 215 -- 216 [side=R] - 216[label=<216
0.10901874828554337>]; - 216 -- 217 [side=R] - 217[label=<217
0.05646524838685241>]; - 218 -- 220 [side=R] - 220[label=<220
0.674411240175005>]; - 220 -- 219 [side=L] - 219[label=<219
0.14299197003272757>]; - 222 -- 223 [side=R] - 223[label=<223
0.6736901076540927>]; - 223 -- 226 [side=R] - 226[label=<226
0.6137254504515176>]; - 226 -- 225 [side=L] - 225[label=<225
0.42057192814517086>]; - 225 -- 224 [side=L] - 224[label=<224
0.15459807255670532>]; - 226 -- 227 [side=R] - 227[label=<227
0.3240860179195383>]; - 227 -- 229 [side=R] - 229[label=<229
0.1917962317572811>]; - 229 -- 228 [side=L] - 228[label=<228
0.09544487292662296>]; - 230 -- 231 [side=R] - 231[label=<231
0.9537855788342208>]; - 231 -- 234 [side=R] - 234[label=<234
0.8201857812416328>]; - 234 -- 233 [side=L] - 233[label=<233
0.38950247492509926>]; - 233 -- 232 [side=L] - 232[label=<232
0.3117950672088875>]; - 235 -- 245 [side=R] - 245[label=<245
0.9606326065278836>]; - 245 -- 243 [side=L] - 243[label=<243
0.9010055124673463>]; - 243 -- 237 [side=L] - 237[label=<237
0.8906258722253291>]; - 237 -- 236 [side=L] - 236[label=<236
0.699210568923407>]; - 237 -- 239 [side=R] - 239[label=<239
0.8460173468831691>]; - 239 -- 238 [side=L] - 238[label=<238
0.11026801706799916>]; - 239 -- 242 [side=R] - 242[label=<242
0.8439586286718704>]; - 242 -- 240 [side=L] - 240[label=<240
0.6602788030770246>]; - 240 -- 241 [side=R] - 241[label=<241
0.5119345141518413>]; - 243 -- 244 [side=R] - 244[label=<244
0.7247536554274525>]; - 245 -- 246 [side=R] - 246[label=<246
0.5721754588545217>]; - 246 -- 247 [side=R] - 247[label=<247
0.5181752052426042>]; - 247 -- 249 [side=R] - 249[label=<249
0.07619775425067954>]; - 249 -- 248 [side=L] - 248[label=<248
0.03123446276525388>]; -} diff --git a/content/posts/2024-07-27-treap-revisited/index.md b/content/posts/2024-07-27-treap-revisited/index.md deleted file mode 100644 index 99ae68f..0000000 --- a/content/posts/2024-07-27-treap-revisited/index.md +++ /dev/null @@ -1,146 +0,0 @@ ---- -title: "Treap, revisited" -date: 2024-07-27T14:12:27+01:00 -draft: false # I don't care for draft mode, git has branches for that -description: "An even simpler BST" -tags: - - algorithms - - data structures - - python -categories: - - programming -series: - - Cool algorithms -favorite: false -disable_feed: false ---- - -My [last post]({{< relref "../2024-07-20-treap/index.md" >}}) about the _Treap_ -showed an implementation using tree rotations, as is commonly done with [AVL -Trees][avl] and [Red Black Trees][rb]. - -But the _Treap_ lends itself well to a simple and elegant implementation with no -tree rotations. This makes it especially easy to implement the removal of a key, -rather than the fiddly process of deletion using tree rotations. - -[avl]: https://en.wikipedia.org/wiki/AVL_tree -[rb]: https://en.wikipedia.org/wiki/Red%E2%80%93black_tree - - - -## Implementation - -All operations on the tree will be implemented in terms of two fundamental -operations: `split` and `merge`. - -We'll be reusing the same structures as in the last post, so let's skip straight -to implementing those fundaments, and building on them for `insert` and -`delete`. - -### Split - -Splitting a tree means taking a key, and getting the following output: - -* a `left` node, root of the tree of all keys lower than the input. -* an extracted `node` which corresponds to the input `key`. -* a `right` node, root of the tree of all keys higher than the input. - -```python -type OptionalNode[K, V] = Node[K, V] | None - -class SplitResult(NamedTuple): - left: OptionalNode - node: OptionalNode - right: OptionalNode - -def split(root: OptionalNode[K, V], key: K) -> SplitResult: - # Base case, empty tree - if root is None: - return SplitResult(None, None, None) - # If we found the key, simply extract left and right - if root.key == key: - left, right = root.left, root.right - root.left, root.right = None, None - return SplitResult(left, root, right) - # Otherwise, recurse on the corresponding side of the tree - if root.key < key: - left, node, right = split(root.right, key) - root.right = left - return SplitResult(root, node, right) - if key < root.key: - left, node, right = split(root.left, key) - root.left = right - return SplitResult(left, node, root) - raise RuntimeError("Unreachable") -``` - -### Merge - -Merging a `left` and `right` tree means (cheaply) building a new tree containing -both of them. A pre-condition for merging is that the `left` tree is composed -entirely of nodes that are lower than any key in `right` (i.e: as in `left` and -`right` after a `split`). - -```python -def merge( - left: OptionalNode[K, V], - right: OptionalNode[K, V], -) -> OptionalNode[K, V]: - # Base cases, left or right being empty - if left is None: - return right - if right is None: - return left - # Left has higher priority, it must become the root node - if left.priority >= right.priority: - # We recursively reconstruct its right sub-tree - left.right = merge(left.right, right) - return left - # Right has higher priority, it must become the root node - if left.priority < right.priority: - # We recursively reconstruct its left sub-tree - right.left = merge(left, right.left) - return right - raise RuntimeError("Unreachable") -``` - -### Insertion - -Inserting a node into the tree is done in two steps: - -1. `split` the tree to isolate the middle insertion point -2. `merge` it back up to form a full tree with the inserted key - -```python -def insert(self, key: K, value: V) -> bool: - # `left` and `right` come before/after the key - left, node, right = split(self._root, key) - was_updated: bool - # Create the node, or update its value, if the key was already in the tree - if node is None: - node = Node(key, value) - was_updated = False - else: - node.value = value - was_updated = True - # Rebuild the tree with a couple of merge operations - self._root = merge(left, merge(node, right)) - # Signal whether the key was already in the key - return was_updated -``` - -### Removal - -Removing a key from the tree is similar to inserting a new key, and forgetting -to insert it back: simply `split` the tree and `merge` it back without the -extracted middle node. - -```python -def remove(self, key: K) -> bool: - # `node` contains the key, or `None` if the key wasn't in the tree - left, node, right = split(self._root, key) - # Put the tree back together, without the extract node - self._root = merge(left, right) - # Signal whether `key` was mapped in the tree - return node is not None -``` diff --git a/content/posts/2024-08-02-reservoir-sampling/index.md b/content/posts/2024-08-02-reservoir-sampling/index.md deleted file mode 100644 index 270c02c..0000000 --- a/content/posts/2024-08-02-reservoir-sampling/index.md +++ /dev/null @@ -1,145 +0,0 @@ ---- -title: "Reservoir Sampling" -date: 2024-08-02T18:30:56+01:00 -draft: false # I don't care for draft mode, git has branches for that -description: "Elegantly sampling a stream" -tags: - - algorithms - - python -categories: - - programming -series: - - Cool algorithms -favorite: false -disable_feed: false -mathjax: true ---- - -[_Reservoir Sampling_][reservoir] is an [online][online], probabilistic -algorithm to uniformly sample $k$ random elements out of a stream of values. - -It's a particularly elegant and small algorithm, only requiring $\Theta(k)$ -amount of space and a single pass through the stream. - -[reservoir]: https://en.wikipedia.org/wiki/Reservoir_sampling -[online]: https://en.wikipedia.org/wiki/Online_algorithm - - - -## Sampling one element - -As an introduction, we'll first focus on fairly sampling one element from the -stream. - -```python -def sample_one[T](stream: Iterable[T]) -> T: - stream_iter = iter(stream) - # Sample the first element - res = next(stream_iter) - for i, val in enumerate(stream_iter, start=1): - j = random.randint(0, i) - # Replace the sampled element with probability 1/(i + 1) - if j == 0: - res = val - # Return the randomly sampled element - return res -``` - -### Proof - -Let's now prove that this algorithm leads to a fair sampling of the stream. - -We'll be doing proof by induction. - -#### Hypothesis $H_N$ - -After iterating through the first $N$ items in the stream, -each of them has had an equal $\frac{1}{N}$ probability of being selected as -`res`. - -#### Base Case $H_1$ - -We can trivially observe that the first element is always assigned to `res`, -$\frac{1}{1} = 1$, the hypothesis has been verified. - -#### Inductive Case - -For a given $N$, let us assume that $H_N$ holds. Let us now look at the events -of loop iteration where `i = N` (i.e: observation of the $N + 1$-th item in the -stream). - -`j = random.randint(0, i)` uniformly selects a value in the range $[0, i]$, -a.k.a $[0, N]$. We then have two cases: - -* `j == 0`, with probability $\frac{1}{N + 1}$: we select `val` as the new -reservoir element `res`. - -* `j != 0`, with probability $\frac{N}{N + 1}$: we keep the previous value of -`res`. By $H_N$, any of the first $N$ elements had a $\frac{1}{N}$ probability -of being `res` before at the start of the loop, each element now has a -probability $\frac{1}{N} \cdot \frac{N}{N + 1} = \frac{1}{N + 1}$ of being the -element. - -And thus, we have proven $H_{N + 1}$ at the end of the loop. - -## Sampling $k$ element - -The code for sampling $k$ elements is very similar to the one-element case. - -```python -def sample[T](stream: Iterable[T], k: int = 1) -> list[T]: - stream_iter = iter(stream) - # Retain the first 'k' elements in the reservoir - res = list(itertools.islice(stream_iter, k)) - for i, val in enumerate(stream_iter, start=k): - j = random.randint(0, i) - # Replace one element at random with probability k/(i + 1) - if j < k: - res[j] = val - # Return 'k' randomly sampled elements - return res -``` - -### Proof - -Let us once again do a proof by induction, assuming the stream contains at least -$k$ items. - -#### Hypothesis $H_N$ - -After iterating through the first $N$ items in the stream, each of them has had -an equal $\frac{k}{N}$ probability of being sampled from the stream. - -#### Base Case $H_k$ - -We can trivially observe that the first $k$ element are sampled at the start of -the algorithm, $\frac{k}{k} = 1$, the hypothesis has been verified. - -#### Inductive Case - -For a given $N$, let us assume that $H_N$ holds. Let us now look at the events -of the loop iteration where `i = N`, in order to prove $H_{N + 1}$. - -`j = random.randint(0, i)` uniformly selects a value in the range $[0, i]$, -a.k.a $[0, N]$. We then have three cases: - -* `j >= k`, with probability $1 - \frac{k}{N + 1}$: we do not modify the -sampled reservoir at all. - -* `j < k`, with probability $\frac{k}{N + 1}$: we sample the new element to -replace the `j`-th element of the reservoir. Therefore for any element -$e \in [0, k[$ we can either have: - * $j = e$: the element _is_ replaced, probability $\frac{1}{k}$. - * $j \neq e$: the element is _not_ replaced, probability $\frac{k - 1}{k}$. - -We can now compute the probability that a previously sampled element is kept in -the reservoir: -$1 - \frac{k}{N + 1} + \frac{k}{N + 1} \cdot \frac{k - 1}{k} = \frac{N}{N + 1}$. - -By $H_N$, any of the first $N$ elements had a $\frac{k}{N}$ probability -of being sampled before at the start of the loop, each element now has a -probability $\frac{k}{N} \cdot \frac{N}{N + 1} = \frac{k}{N + 1}$ of being the -element. - -We have now proven that all elements have a probability $\frac{k}{N + 1}$ of -being sampled at the end of the loop, therefore $H_{N + 1}$ has been verified. diff --git a/content/posts/2024-08-10-kd-tree/index.md b/content/posts/2024-08-10-kd-tree/index.md deleted file mode 100644 index 2863201..0000000 --- a/content/posts/2024-08-10-kd-tree/index.md +++ /dev/null @@ -1,472 +0,0 @@ ---- -title: "k-d Tree" -date: 2024-08-10T11:50:33+01:00 -draft: false # I don't care for draft mode, git has branches for that -description: "Points in spaaaaace!" -tags: - - algorithms - - data structures - - python -categories: - - programming -series: - - Cool algorithms -favorite: false -disable_feed: false ---- - -The [_k-d Tree_][wiki] is a useful way to map points in space and make them -efficient to query. - -I ran into them during my studies in graphics, as they are one of the -possible acceleration structures for [ray-casting] operations. - -[wiki]: https://en.wikipedia.org/wiki/K-d_tree -[ray-casting]: https://en.wikipedia.org/wiki/Ray_casting - - - -## Implementation - -As usual, this will be in Python, though its lack of proper discriminated enums -makes it more verbose than would otherwise be necessary. - -### Pre-requisites - -Let's first define what kind of space our _k-d Tree_ is dealing with. In this -instance $k = 3$ just like in the normal world. - -```python -class Point(NamedTuple): - x: float - y: float - z: float - -class Axis(IntEnum): - X = 0 - Y = 1 - Z = 2 - - def next(self) -> Axis: - # Each level of the tree is split along a different axis - return Axis((self + 1) % 3) -``` - -### Representation - -The tree is represented by `KdTree`, each of its leaf nodes is a `KdLeafNode` -and its inner nodes are `KdSplitNode`s. - -For each point in space, the tree can also keep track of an associated value, -similar to a dictionary or other mapping data structure. Hence we will make our -`KdTree` generic to this mapped type `T`. - -#### Leaf node - -A leaf node contains a number of points that were added to the tree. For each -point, we also track their mapped value, hence the `dict[Point, T]`. - -```python -class KdLeafNode[T]: - points: dict[Point, T] - - def __init__(self): - self.points = {} -``` - -#### Split node - -An inner node must partition the space into two sub-spaces along a given axis -and mid-point (thus defining a plane). All points that are "to the left" of the -plane will be kept in one child, while all the points "to the right" will be in -the other. Similar to a [_Binary Search Tree_][bst]'s inner nodes. - -[bst]: https://en.wikipedia.org/wiki/Binary_search_tree - -```python -class KdSplitNode[T]: - axis: Axis - mid: float - children: tuple[KdTreeNode[T], KdTreeNode[T]] - - # Convenience function to index into the child which contains `point` - def _index(self, point: Point) -> int: - return 0 if point[self.axis] <= self.mid else 1 -``` - -#### Tree - -The tree itself is merely a wrapper around its inner nodes. - -Once annoying issue about writing this in Python is the lack of proper -discriminated enum types. So we need to create a wrapper type for the nodes -(`KdNode`) to allow for splitting when updating the tree. - -```python -class KdNode[T]: - # Wrapper around leaf/inner nodes, the poor man's discriminated enum - inner: KdLeafNode[T] | KdSplitNode[T] - - def __init__(self): - self.inner = KdLeafNode() - - # Convenience constructor used when splitting a node - @classmethod - def from_items(cls, items: Iterable[tuple[Point, T]]) -> KdNode[T]: - res = cls() - res.inner.points.update(items) - return res - -class KdTree[T]: - _root: KdNode[T] - - def __init__(self): - # Tree starts out empty - self._root = KdNode() -``` - -### Inserting a point - -To add a point to the tree, we simply recurse from node to node, similar to a -_BST_'s insertion algorithm. Once we've found the correct leaf node to insert -our point into, we simply do so. - -If that leaf node goes over the maximum number of points it can store, we must -then split it along an axis, cycling between `X`, `Y`, and `Z` at each level of -the tree (i.e: splitting along the `X` axis on the first level, then `Y` on the -second, then `Z` after that, and then `X`, etc...). - -```python -# How many points should be stored in a leaf node before being split -MAX_CAPACITY = 32 - -def median(values: Iterable[float]) -> float: - sorted_values = sorted(values) - mid_point = len(sorted_values) // 2 - if len(sorted_values) % 2 == 1: - return sorted_values[mid_point] - a, b = sorted_values[mid_point], sorted_values[mid_point + 1] - return a + (b - a) / 2 - -def partition[T]( - pred: Callable[[T], bool], - iterable: Iterable[T] -) -> tuple[list[T], list[T]]: - truths, falses = [], [] - for v in iterable: - (truths if pred(v) else falses).append(v) - return truths, falses - -def split_leaf[T](node: KdLeafNode[T], axis: Axis) -> KdSplitNode[T]: - # Find the median value for the given axis - mid = median(p[axis] for p in node.points) - # Split into left/right children according to the mid-point and axis - left, right = partition(lambda kv: kv[0][axis] <= mid, node.points.items()) - return KdSplitNode( - split_axis, - mid, - (KdNode.from_items(left), KdNode.from_items(right)), - ) - -class KdTree[T]: - def insert(self, point: Point, val: T) -> bool: - # Forward to the root node, choose `X` as the first split axis - return self._root.insert(point, val, Axis.X) - -class KdLeafNode[T]: - def insert(self, point: Point, val: T, split_axis: Axis) -> bool: - # Check whether we're overwriting a previous value - was_mapped = point in self.points - # Store the corresponding value - self.points[point] = val - # Return whether we've performed an overwrite - return was_mapped - -class KdSplitNode[T]: - def insert(self, point: Point, val: T, split_axis: Axis) -> bool: - # Find the child which contains the point - child = self.children[self._index(point)] - # Recurse into it, choosing the next split axis - return child.insert(point, val, split_axis.next()) - -class KdNode[T]: - def insert(self, point: Point, val: T, split_axis: Axis) -> bool: - # Add the point to the wrapped node... - res = self.inner.insert(point, val, split_axis) - # ... And take care of splitting leaf nodes when necessary - if ( - isinstance(self.inner, KdLeafNode) - and len(self.inner.points) > MAX_CAPACITY - ): - self.inner = split_leaf(self.inner, split_axis) - return res -``` - -### Searching for a point - -Looking for a given point in the tree look very similar to a _BST_'s search, -each leaf node dividing the space into two sub-spaces, only one of which -contains the point. - -```python -class KdTree[T]: - def lookup(self, point: Point) -> T | None: - # Forward to the root node - return self._root.lookup(point) - -class KdNode[T]: - def lookup(self, point: Point) -> T | None: - # Forward to the wrapped node - return self.inner.lookup(point) - -class KdLeafNode[T]: - def lookup(self, point: Point) -> T | None: - # Simply check whether we've stored the point in this leaf - return self.points.get(point) - -class KdSplitNode[T]: - def lookup(self, point: Point) -> T | None: - # Recurse into the child which contains the point - return self.children[self._index(point)].lookup(point) -``` - -### Closest points - -Now to look at the most interesting operation one can do on a _k-d Tree_: -querying for the objects which are closest to a given point (i.e: the [Nearest -neighbour search][nns]. - -This is a more complicated algorithm, which will also need some modifications to -current _k-d Tree_ implementation in order to track just a bit more information -about the points it contains. - -[nns]: https://en.wikipedia.org/wiki/Nearest_neighbor_search - -#### A notion of distance - -To search for the closest points to a given origin, we first need to define -which [distance](https://en.wikipedia.org/wiki/Distance) we are using in our -space. - -For this example, we'll simply be using the usual definition of [(Euclidean) -distance][euclidean-distance]. - -[euclidean-distance]: https://en.wikipedia.org/wiki/Euclidean_distance - -```python -def dist(point: Point, other: Point) -> float: - return sqrt(sum((a - b) ** 2 for a, b in zip(self, other))) -``` - -#### Tracking the tree's boundaries - -To make the query efficient, we'll need to track the tree's boundaries: the -bounding box of all points contained therein. This will allow us to stop the -search early once we've found enough points and can be sure that the rest of the -tree is too far away to qualify. - -For this, let's define the `AABB` (Axis-Aligned Bounding Box) class. - -```python -class Point(NamedTuple): - # Convenience function to replace the coordinate along a given dimension - def replace(self, axis: Axis, new_coord: float) -> Point: - coords = list(self) - coords[axis] = new_coord - return Point(coords) - -class AABB(NamedTuple): - # Lowest coordinates in the box - low: Point - # Highest coordinates in the box - high: Point - - # An empty box - @classmethod - def empty(cls) -> AABB: - return cls( - Point(*(float("inf"),) * 3), - Point(*(float("-inf"),) * 3), - ) - - # Split the box into two along a given axis for a given mid-point - def split(axis: Axis, mid: float) -> tuple[AABB, AABB]: - assert self.low[axis] <= mid <= self.high[axis] - return ( - AABB(self.low, self.high.replace(axis, mid)), - AABB(self.low.replace(axis, mid), self.high), - ) - - # Extend a box to contain a given point - def extend(self, point: Point) -> AABB: - low = NamedTuple(*(map(min, zip(self.low, point)))) - high = NamedTuple(*(map(max, zip(self.high, point)))) - return AABB(low, high) - - # Return the shortest between a given point and the box - def dist_to_point(self, point: Point) -> float: - deltas = ( - max(self.low[axis] - point[axis], 0, point[axis] - self.high[axis]) - for axis in Axis - ) - return dist(Point(0, 0, 0), Point(*deltas)) -``` - -And do the necessary modifications to the `KdTree` to store the bounding box and -update it as we add new points. - -```python -class KdTree[T]: - _root: KdNode[T] - # New field: to keep track of the tree's boundaries - _aabb: AABB - - def __init__(self): - self._root = KdNode() - # Initialize the empty tree with an empty bounding box - self._aabb = AABB.empty() - - def insert(self, point: Point, val: T) -> bool: - # Extend the AABB for our k-d Tree when adding a point to it - self._aabb = self._aabb.extend(point) - return self._root.insert(point, val, Axis.X) -``` - -#### `MaxHeap` - -Python's builtin [`heapq`][heapq] module provides the necessary functions to -create and interact with a [_Priority Queue_][priority-queue], in the form of a -[_Binary Heap_][binary-heap]. - -Unfortunately, Python's library maintains a _min-heap_, which keeps the minimum -element at the root. For this algorithm, we're interested in having a -_max-heap_, with the maximum at the root. - -Thankfully, one can just reverse the comparison function for each element to -convert between the two. Let's write a `MaxHeap` class making use of this -library, with a `Reverse` wrapper class to reverse the order of elements -contained within it (similar to [Rust's `Reverse`][reverse]). - -[binary-heap]: https://en.wikipedia.org/wiki/Binary_heap -[heapq]: https://docs.python.org/3/library/heapq.html -[priority-queue]: https://en.wikipedia.org/wiki/Priority_queue -[reverse]: https://doc.rust-lang.org/std/cmp/struct.Reverse.html - -```python -# Reverses the wrapped value's ordering -@functools.total_ordering -class Reverse[T]: - value: T - - def __init__(self, value: T): - self.value = value - - def __lt__(self, other: Reverse[T]) -> bool: - return self.value > other.value - - def __eq__(self, other: Reverse[T]) -> bool: - return self.value == other.value - -class MaxHeap[T]: - _heap: list[Reverse[T]] - - def __init__(self): - self._heap = [] - - def __len__(self) -> int: - return len(self._heap) - - def __iter__(self) -> Iterator[T]: - yield from (item.value for item in self._heap) - - # Push a value on the heap - def push(self, value: T) -> None: - heapq.heappush(self._heap, Reverse(value)) - - # Peek at the current maximum value - def peek(self) -> T: - return self._heap[0].value - - # Pop and return the highest value - def pop(self) -> T: - return heapq.heappop(self._heap).value - - # Pushes a value onto the heap, pops and returns the highest value - def pushpop(self, value: T) -> T: - return heapq.heappushpop(self._heap, Reverse(value)).value -``` - -#### The actual Implementation - -Now that we have written the necessary building blocks, let's tackle the -Implementation of `closest` for our _k-d Tree_. - -```python -# Wrapper type for closest points, ordered by `distance` -@dataclasses.dataclass(order=True) -class ClosestPoint[T](NamedTuple): - point: Point = field(compare=False) - value: T = field(compare=False) - distance: float - -class KdTree[T]: - def closest(self, point: Point, n: int = 1) -> list[ClosestPoint[T]]: - assert n > 0 - # Create the output heap - res = MaxHeap() - # Recurse onto the root node - self._root.closest(point, res, n, self._aabb) - # Return the resulting list, from closest to farthest - return sorted(res) - -class KdNode[T]: - def closest( - self, - point: Point, - out: MaxHeap[ClosestPoint[T]], - n: int, - bounds: AABB, - ) -> None: - # Forward to the wrapped node - self.inner.closest(point, out, n, bounds) - -class KdLeafNode[T]: - def closest( - self, - point: Point, - out: MaxHeap[ClosestPoint[T]], - n: int, - bounds: AABB, - ) -> None: - # At the leaf, simply iterate over all points and add them to the heap - for p, val in self.points.items(): - item = ClosestPoint(p, val, dist(p, point)) - if len(out) < n: - # If the heap isn't full, just push - out.push(item) - elif out.peek().distance > item.distance: - # Otherwise, push and pop to keep the heap at `n` elements - out.pushpop(item) - -class KdSplitNode[T]: - def closest( - self, - point: Point, - out: MaxHeap[ClosestPoint[T]], - n: int, - bounds: AABB, - ) -> None: - index = self._index(point) - children_bounds = bounds.split(self.axis, self.mid) - # Iterate over the child which contains the point, then its neighbour - for i in (index, 1 - index): - child, bounds = self.children[i], children_bounds[i] - # `min_dist` is 0 for the first child, and the minimum distance of - # all points contained in the second child - min_dist = bounds.dist_to_point(point) - # If the heap is at capacity and the child to inspect too far, stop - if len(out) == n and min_dist > out.peek().distance: - return - # Otherwise, recurse - child.closest(point, out, n, bounds) -``` diff --git a/content/posts/2024-08-17-kd-tree-revisited/index.md b/content/posts/2024-08-17-kd-tree-revisited/index.md deleted file mode 100644 index 58d2a6b..0000000 --- a/content/posts/2024-08-17-kd-tree-revisited/index.md +++ /dev/null @@ -1,112 +0,0 @@ ---- -title: "k-d Tree Revisited" -date: 2024-08-17T14:20:22+01:00 -draft: false # I don't care for draft mode, git has branches for that -description: "Simplifying the nearest neighbour search" -tags: - - algorithms - - data structures - - python -categories: - - programming -series: - - Cool algorithms -favorite: false -disable_feed: false ---- - -After giving it a bit of thought, I've found a way to simplify the nearest -neighbour search (i.e: the `closest` method) for the `KdTree` I implemented in -[my previous post]({{< relref "../2024-08-10-kd-tree/index.md" >}}). - - - -## The improvement - -That post implemented the nearest neighbour search by keeping track of the -tree's boundaries (through `AABB`), and each of its sub-trees (through -`AABB.split`), and testing for the early exit condition by computing the -distance of the search's origin to each sub-tree's boundaries. - -Instead of _explicitly_ keeping track of each sub-tree's boundaries, we can -implicitly compute it when recursing down the tree. - -To check for the distance between the queried point and the splitting plane of -inner nodes: we simply need to project the origin onto that plane, thus giving -us a minimal bound on the distance of the points stored on the other side. - -This can be easily computed from the `axis` and `mid` values which are stored in -the inner nodes: to project the node on the plane we simply replace its -coordinate for this axis by `mid`. - -## Simplified search - -With that out of the way, let's now see how `closest` can be implemented without -needing to track the tree's `AABB` at the root: - -```python -# Wrapper type for closest points, ordered by `distance` -@dataclasses.dataclass(order=True) -class ClosestPoint[T](NamedTuple): - point: Point = field(compare=False) - value: T = field(compare=False) - distance: float - -class KdTree[T]: - def closest(self, point: Point, n: int = 1) -> list[ClosestPoint[T]]: - assert n > 0 - res = MaxHeap() - # Instead of passing an `AABB`, we give an initial projection point, - # the query origin itself (since we haven't visited any split node yet) - self._root.closest(point, res, n, point) - return sorted(res) - -class KdNode[T]: - def closest( - self, - point: Point, - out: MaxHeap[ClosestPoint[T]], - n: int, - projection: Point, - ) -> None: - # Same implementation - self.inner.closest(point, out, n, bounds) - -class KdLeafNode[T]: - def closest( - self, - point: Point, - out: MaxHeap[ClosestPoint[T]], - n: int, - projection: Point, - ) -> None: - # Same implementation - for p, val in self.points.items(): - item = ClosestPoint(p, val, dist(p, point)) - if len(out) < n: - out.push(item) - elif out.peek().distance > item.distance: - out.pushpop(item) - -class KdSplitNode[T]: - def closest( - self, - point: Point, - out: MaxHeap[ClosestPoint[T]], - n: int, - projection: Point, - ) -> None: - index = self._index(point) - self.children[index].closest(point, out, n, projection) - # Project onto the splitting plane, for a minimum distance to its points - projection = projection.replace(self.axis, self.mid) - # If we're at capacity and can't possibly find any closer points, exit - if len(out) == n and dist(point, projection) > out.peek().distance: - return - # Otherwise recurse on the other side to check for nearer neighbours - self.children[1 - index].closest(point, out, n, projection) -``` - -As you can see, the main difference is in `KdSplitNode`'s implementation, where -we can quickly compute the minimum distance between the search's origin and all -potential points in that subspace. diff --git a/content/wish-lists.md b/content/wish-lists.md index e0a4548..0a65fa9 100644 --- a/content/wish-lists.md +++ b/content/wish-lists.md @@ -14,13 +14,3 @@ A few of my Amazon wish lists in case you want to give me a gift. * [Cooking & kitchenware](https://www.amazon.fr/hz/wishlist/ls/2MNRCLPNABZSU) * [Data Structures & Algorithms](https://www.amazon.fr/hz/wishlist/ls/2XZPQSBOGOFC3) * [Programming](https://www.amazon.fr/hz/wishlist/ls/1R4KFV4H2D8IF) -* Other items: - * [Chef's presses](https://www.thechefspress.com/shop) - * [UK link](https://www.kitchenprovisions.co.uk/products/the-chefs-press-8oz-13oz) - * [FR link](https://www.thekitchenlab.fr/fr/p/la-presse-du-chef-poids-de-friture-bruce-hill-13-oz/) - * [Combustion Inc thermometer and - display](https://combustion.inc/products/predictive-thermometer-gen2-wifi-display) - * [Get the one with the range extender if you *really* want to spoil - me](https://combustion.inc/products/predictive-thermometer-gen2-wifi-booster-wifi-display) - * [Cannelés - molds](https://www.laboetgato.fr/en/moules-a-canneles/13964-mould-for-canneles-non-polished-copper-o-45-mm-3333331010026.html) diff --git a/flake.lock b/flake.lock index 06446f6..f4850aa 100644 --- a/flake.lock +++ b/flake.lock @@ -1,68 +1,28 @@ { "nodes": { - "flake-compat": { - "flake": false, - "locked": { - "lastModified": 1696426674, - "narHash": "sha256-kvjfFW7WAETZlt09AgDn1MrtKzP7t90Vf7vypd3OL1U=", - "owner": "edolstra", - "repo": "flake-compat", - "rev": "0f9255e01c2351cc7d116c072cb317785dd33b33", - "type": "github" - }, - "original": { - "owner": "edolstra", - "repo": "flake-compat", - "type": "github" - } - }, "futils": { - "inputs": { - "systems": "systems" - }, "locked": { - "lastModified": 1710146030, - "narHash": "sha256-SZ5L6eA7HJ/nmkzGG7/ISclqe6oZdOZTNoesiInkXPQ=", + "lastModified": 1622445595, + "narHash": "sha256-m+JRe6Wc5OZ/mKw2bB3+Tl0ZbtyxxxfnAWln8Q5qs+Y=", "owner": "numtide", "repo": "flake-utils", - "rev": "b1d9ab70662946ef0850d488da1c9019f3a9752a", + "rev": "7d706970d94bc5559077eb1a6600afddcd25a7c8", "type": "github" }, "original": { "owner": "numtide", - "ref": "main", + "ref": "master", "repo": "flake-utils", "type": "github" } }, - "gitignore": { - "inputs": { - "nixpkgs": [ - "pre-commit-hooks", - "nixpkgs" - ] - }, - "locked": { - "lastModified": 1709087332, - "narHash": "sha256-HG2cCnktfHsKV0s4XW83gU3F57gaTljL9KNSuG6bnQs=", - "owner": "hercules-ci", - "repo": "gitignore.nix", - "rev": "637db329424fd7e46cf4185293b9cc8c88c95394", - "type": "github" - }, - "original": { - "owner": "hercules-ci", - "repo": "gitignore.nix", - "type": "github" - } - }, "nixpkgs": { "locked": { - "lastModified": 1722415718, - "narHash": "sha256-5US0/pgxbMksF92k1+eOa8arJTJiPvsdZj9Dl+vJkM4=", + "lastModified": 1628320020, + "narHash": "sha256-4xBEb+TOHyIGpK37EVsZx6dGPwNMf5YWNBJaQ4VyZws=", "owner": "NixOS", "repo": "nixpkgs", - "rev": "c3392ad349a5227f4a3464dce87bcc5046692fce", + "rev": "67c80531be622641b5b2ccc3a7aff355cb02476b", "type": "github" }, "original": { @@ -74,21 +34,19 @@ }, "pre-commit-hooks": { "inputs": { - "flake-compat": "flake-compat", - "gitignore": "gitignore", - "nixpkgs": [ - "nixpkgs" + "flake-utils": [ + "futils" ], - "nixpkgs-stable": [ + "nixpkgs": [ "nixpkgs" ] }, "locked": { - "lastModified": 1721042469, - "narHash": "sha256-6FPUl7HVtvRHCCBQne7Ylp4p+dpP3P/OYuzjztZ4s70=", + "lastModified": 1621411868, + "narHash": "sha256-R+7OQ2JYFCb3E7Jl7LhRifzMVCR6Gl8R98zYsNhZtJ8=", "owner": "cachix", "repo": "pre-commit-hooks.nix", - "rev": "f451c19376071a90d8c58ab1a953c6e9840527fd", + "rev": "2e7fac06108b4fc81f5ff9ed9a02bc4f6ede7001", "type": "github" }, "original": { @@ -104,21 +62,6 @@ "nixpkgs": "nixpkgs", "pre-commit-hooks": "pre-commit-hooks" } - }, - "systems": { - "locked": { - "lastModified": 1681028828, - "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=", - "owner": "nix-systems", - "repo": "default", - "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e", - "type": "github" - }, - "original": { - "owner": "nix-systems", - "repo": "default", - "type": "github" - } } }, "root": "root", diff --git a/flake.nix b/flake.nix index f3b8652..be1350a 100644 --- a/flake.nix +++ b/flake.nix @@ -6,7 +6,7 @@ type = "github"; owner = "numtide"; repo = "flake-utils"; - ref = "main"; + ref = "master"; }; nixpkgs = { @@ -22,8 +22,8 @@ repo = "pre-commit-hooks.nix"; ref = "master"; inputs = { + flake-utils.follows = "futils"; nixpkgs.follows = "nixpkgs"; - nixpkgs-stable.follows = "nixpkgs"; }; }; }; @@ -61,17 +61,15 @@ }; }; - devShells = { - default = pkgs.mkShell { - name = "blog"; + devShell = pkgs.mkShell { + name = "blog"; - buildInputs = with pkgs; [ - gnumake - hugo - ]; + buildInputs = with pkgs; [ + gnumake + hugo + ]; - inherit (self.checks.${system}.pre-commit) shellHook; - }; + inherit (self.checks.${system}.pre-commit) shellHook; }; } ); diff --git a/i18n/en.yaml b/i18n/en.yaml index 1f24308..cd75b25 100644 --- a/i18n/en.yaml +++ b/i18n/en.yaml @@ -1,5 +1,5 @@ -series: - other: "series" +serie: + other: "serie" Series: other: "Series" diff --git a/i18n/fr.yaml b/i18n/fr.yaml index 88dfcd3..22986f5 100644 --- a/i18n/fr.yaml +++ b/i18n/fr.yaml @@ -1,4 +1,4 @@ -series: +serie: other: "série" Series: diff --git a/layouts/partials/footer-extra.html b/layouts/partials/footer-extra.html index 23530b8..d6daaaf 100644 --- a/layouts/partials/footer-extra.html +++ b/layouts/partials/footer-extra.html @@ -11,7 +11,6 @@ Sourcehut LinkedIn Matrix - Mastodon PGP

diff --git a/layouts/partials/head-extra.html b/layouts/partials/head-extra.html index dc97efa..1e65c9f 100644 --- a/layouts/partials/head-extra.html +++ b/layouts/partials/head-extra.html @@ -3,30 +3,6 @@ {{ end }} - -{{ if (.Params.graphviz) }} - - -{{ end }} - -{{ if (.Params.mermaid) }} - -{{ end }} {{ with .OutputFormats.Get "atom" -}} {{ printf `` .Rel .MediaType.Type .Permalink $.Site.Title | safeHTML }} {{ end -}} diff --git a/layouts/shortcodes/graphviz.html b/layouts/shortcodes/graphviz.html deleted file mode 100644 index 45516a3..0000000 --- a/layouts/shortcodes/graphviz.html +++ /dev/null @@ -1,16 +0,0 @@ -

-    {{ with .Get "file" }}
-        {{ if eq (. | printf "%.1s") "/" }}
-            {{/* Absolute path are from root of site. */}}
-            {{ $.Scratch.Set "filepath" . }}
-        {{ else }}
-            {{/* Relative paths are from page directory. */}}
-            {{ $.Scratch.Set "filepath" $.Page.File.Dir }}
-            {{ $.Scratch.Add "filepath" . }}
-        {{ end }}
-
-        {{ $.Scratch.Get "filepath" | readFile }}
-    {{ else }}
-        {{.Inner}}
-    {{ end }}
-
diff --git a/layouts/shortcodes/mermaid.html b/layouts/shortcodes/mermaid.html deleted file mode 100644 index 80cf0a5..0000000 --- a/layouts/shortcodes/mermaid.html +++ /dev/null @@ -1,16 +0,0 @@ -
-    {{ with .Get "file" }}
-        {{ if eq (. | printf "%.1s") "/" }}
-            {{/* Absolute path are from root of site. */}}
-            {{ $.Scratch.Set "filepath" . }}
-        {{ else }}
-            {{/* Relative paths are from page directory. */}}
-            {{ $.Scratch.Set "filepath" $.Page.File.Dir }}
-            {{ $.Scratch.Add "filepath" . }}
-        {{ end }}
-
-        {{ $.Scratch.Get "filepath" | readFile }}
-    {{ else }}
-        {{.Inner}}
-    {{ end }}
-
diff --git a/layouts/shortcodes/tikz.html b/layouts/shortcodes/tikz.html index c298b09..a3a78f4 100644 --- a/layouts/shortcodes/tikz.html +++ b/layouts/shortcodes/tikz.html @@ -1,16 +1,3 @@ diff --git a/themes/anubis b/themes/anubis index d77e0d6..5dab60e 160000 --- a/themes/anubis +++ b/themes/anubis @@ -1 +1 @@ -Subproject commit d77e0d6d552ad135138f1f9443ef30cc5af8f0f3 +Subproject commit 5dab60e04a37896c09a32137aefe821c63b3af04 diff --git a/themes/hugo-atom-feed b/themes/hugo-atom-feed index d545eff..5da913d 160000 --- a/themes/hugo-atom-feed +++ b/themes/hugo-atom-feed @@ -1 +1 @@ -Subproject commit d545effed9949bf834eaed09ad423ec3e030794f +Subproject commit 5da913dc46d2dadcaf6548256238c58c504476de