-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathindex.js
1065 lines (993 loc) · 43.9 KB
/
index.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
// # Literate Flatbush
// ## Understanding a fast, elegant RTree implementation
//
// [Kyle Barron](https://kylebarron.dev/) \
// January 8, 2025 \
// [Source code](https://github.com/kylebarron/literate-flatbush).
//
// [Spatial
// indexes](https://blog.mapbox.com/a-dive-into-spatial-search-algorithms-ebd0c5e39d2a)
// are at the core of geospatial software engineering. Given a spatial query
// ("What items are within this [bounding
// box](https://en.wikipedia.org/wiki/Minimum_bounding_box)" or "What are the
// closest items to this point"), they allow for weeding out the vast majority
// of data, making a search massively faster than naively checking all items.
//
// An [**RTree**](https://en.wikipedia.org/wiki/R-tree) is one of the most
// common types of spatial indexes. An RTree indexes [axis-aligned bounding
// boxes](https://en.wikipedia.org/wiki/Minimum_bounding_box#Axis-aligned_minimum_bounding_box),
// and so can flexibly manage a variety of [geospatial vector
// data](https://datacarpentry.github.io/organization-geospatial/02-intro-vector-data.html),
// like points, lines, and polygons (by recording the bounding box
// represented by the minimum and maximum extents of a geometry's coordinates).
//
// _But ever wondered how an RTree is actually implemented?_
//
// In this post we'll dive into the implementation of
// [Flatbush](https://github.com/mourner/flatbush), a blazing-fast,
// memory-efficient RTree written in JavaScript by [Volodymyr
// Agafonkin](https://agafonkin.com/). While this implementation is written in
// JavaScript, it's the _algorithm_ that's important here. Don't get too caught
// up in the JavaScript; it should be easy to follow no matter what language
// you're most familiar with.
//
// I [ported Flatbush to Rust](https://github.com/kylebarron/geo-index) with
// [Python bindings](https://kylebarron.dev/geo-index), and this post is the
// result of my efforts to better understand and document how the algorithm
// works.
//
// This post is a
// ["literate"](https://en.wikipedia.org/wiki/Literate_programming) fork of the
// upstream [Flatbush](https://github.com/mourner/flatbush) library.
// I've added comments to the code, and
// [docco](https://ashkenas.com/docco/) is used to generate the HTML file you're
// reading now. Documentation and code are interspersed, letting you follow
// along with the code. No code modifications have been made in this fork; only
// comments have been added. The source for this fork is
// [here](https://github.com/kylebarron/literate-flatbush).
//
// All credit for this code included here goes to Volodymyr Agafonkin and other
// contributors to the Flatbush project, forked here under the ISC license. Any
// errors in explanation are mine alone.
//
// ## Overview
//
// The Flatbush algorithm generates a **static, packed, ABI-stable RTree**.
// Let's break that down:
//
// - [**RTree**](https://en.wikipedia.org/wiki/R-tree): a spatial index for
// storing geospatial vector data that allows for fast spatial queries.
//
// It's a form of a
// ["tree"](https://en.wikipedia.org/wiki/Tree_(abstract_data_type)).
// There's one root node that has `nodeSize` children. Each of those nodes
// have their own `nodeSize` children, and so on. The tree structure allows
// you to avoid superfluous checks and quickly find matching candidates for
// your query. In particular, an RTree stores a _bounding box_ for each
// geometry.
//
// - **static**: the index is immutable. All geometries need to be added to the
// index before any searches can be done. Geometries can't be added to an
// existing index later.
//
// - **packed**: all nodes are at full capacity (except for the last node at
// each tree level). Because the tree is static, we don't need to reserve
// space in each node for future additions. This improves memory efficiency.
//
// - **ABI-stable**: the entire tree is stored in a single underlying memory
// buffer, with a well-defined, stable memory layout. This enables zero-copy
// sharing between threads ([Web
// Workers](https://developer.mozilla.org/en-US/docs/Web/API/Web_Workers_API)
// in the browser) or, [as in my Rust
// port](https://github.com/kylebarron/geo-index), between two languages like
// Rust and Python.
//
// ## Why Flatbush?
//
// There are several nice features about Flatbush:
//
// - **Speed**: This is likely the fastest static spatial index in JavaScript. Ports of the algorithm are among the fastest spatial indexes in other languages, too.
// - **Single, contiguous underlying buffer**: The index is contained in a single `ArrayBuffer`, which makes it easy to share across multiple threads or persist and use later. In the process of building the index, there are only **two buffer allocations**: one for the main data buffer and a second intermediate one for the hilbert values.
// - **Memory-efficiency**: because the index is fully packed, it's highly memory efficient.
// - **Bounded-memory**: for any given number of items and node size, you can infer the total memory that will be used by the RTree.
// - **Elegant and concise**: Under 300 lines of JavaScript code and in my opinion it's quite elegant how the structure of the tree implicitly maintains the insertion index.
// - Used as the basis for other projects, like the [FlatGeobuf](https://flatgeobuf.org/) geospatial file format.
//
// What's not to like? Keep in mind there are a few restrictions:
//
// - Only two-dimensional data. Because the algorithm uses powers of two, only
// two-dimensional data is supported. It can be used with higher-dimensional
// input as long as you only index two of the dimensions.
// - The index is immutable. After creating the index, items can no longer be
// added or removed.
//
// ## Buffer layout
//
// All bounding box and index data is stored in a single, contiguous buffer,
// with three parts:
//
// - Header: an 8-byte header containing the coordinate array type, node size,
// and number of items.
// - Boxes: the bounding box data for each input geometry and intermediate tree
// nodes.
// - Indices: An ordering of boxes to allow for traversing the tree and
// retrieving the original insertion index.
//
// ## Diving into the code
import FlatQueue from "flatqueue";
// Flatbush supports a variety of
// [`TypedArray`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/TypedArray)
// types to store box coordinate data. Flatbush uses `Float64Array` by default.
const ARRAY_TYPES = [
Int8Array,
Uint8Array,
Uint8ClampedArray,
Int16Array,
Uint16Array,
Int32Array,
Uint32Array,
Float32Array,
Float64Array,
];
// The Flatbush serialized format version is bumped whenever the binary layout
// of the index changes
const VERSION = 3; // serialized format version
/** @typedef {Int8ArrayConstructor | Uint8ArrayConstructor | Uint8ClampedArrayConstructor | Int16ArrayConstructor | Uint16ArrayConstructor | Int32ArrayConstructor | Uint32ArrayConstructor | Float32ArrayConstructor | Float64ArrayConstructor} TypedArrayConstructor */
// ## Flatbush
//
// The `Flatbush` class is the only export from the Flatbush library. It
// contains functions to create and query the spatial index.
export default class Flatbush {
// ### Flatbush.from
//
// One of Flatbush's goals is to support zero-copy usage, meaning that you can
// take an `ArrayBuffer` backing a Flatbush index and
// [_transfer_](https://developer.mozilla.org/en-US/docs/Web/API/Web_Workers_API/Transferable_objects)
// it between threads at virtually zero cost.
//
// The `from` static method on the class reconstructs a `Flatbush` instance
// from a raw `ArrayBuffer`.
/**
* Recreate a Flatbush index from raw `ArrayBuffer` or `SharedArrayBuffer` data.
* @param {ArrayBuffer | SharedArrayBuffer} data
* @returns {Flatbush} index
*/
static from(data) {
if (!data || data.byteLength === undefined || data.buffer) {
throw new Error(
"Data must be an instance of ArrayBuffer or SharedArrayBuffer."
);
}
// The first 8 bytes contain a header:
//
// - byte 1: a "magic byte" set to `0xfb`.
// - byte 2: four bits for the serialized format version and four bits for the array type used for storing coordinates
// - byte 3-4: a uint16-encoded number representing the size of each node
// - byte 5-8: a uint32-encoded number representing the total number of
// items in the index.
//
// We read each of these bytes from the provided data buffer, then pass the
// relevant parameters to the class constructor. Because the `data` argument
// (passed last) is not `undefined`, the constructor will not create a new
// underlying buffer, but rather reuse the existing buffer.
const [magic, versionAndType] = new Uint8Array(data, 0, 2);
if (magic !== 0xfb) {
throw new Error("Data does not appear to be in a Flatbush format.");
}
const version = versionAndType >> 4;
if (version !== VERSION) {
throw new Error(`Got v${version} data when expected v${VERSION}.`);
}
const ArrayType = ARRAY_TYPES[versionAndType & 0x0f];
if (!ArrayType) {
throw new Error("Unrecognized array type.");
}
const [nodeSize] = new Uint16Array(data, 2, 1);
const [numItems] = new Uint32Array(data, 4, 1);
return new Flatbush(numItems, nodeSize, ArrayType, undefined, data);
}
// ### Constructor
//
// The Flatbush constructor initializes the memory space (`ArrayBuffer`) for a
// Flatbush tree given the number of items the tree will contain and the
// number of elements per tree node.
/**
* Create a Flatbush index that will hold a given number of items.
* @param {number} numItems
* @param {number} [nodeSize=16] Size of the tree node (16 by default).
* @param {TypedArrayConstructor} [ArrayType=Float64Array] The array type used for coordinates storage (`Float64Array` by default).
* @param {ArrayBufferConstructor | SharedArrayBufferConstructor} [ArrayBufferType=ArrayBuffer] The array buffer type used to store data (`ArrayBuffer` by default).
* @param {ArrayBuffer | SharedArrayBuffer} [data] (Only used internally)
*/
constructor(
numItems,
nodeSize = 16,
ArrayType = Float64Array,
ArrayBufferType = ArrayBuffer,
data
) {
if (numItems === undefined)
throw new Error("Missing required argument: numItems.");
if (isNaN(numItems) || numItems <= 0)
throw new Error(`Unexpected numItems value: ${numItems}.`);
this.numItems = +numItems;
this.nodeSize = Math.min(Math.max(+nodeSize, 2), 65535);
// This do-while loop calculates the total number of nodes at each level of
// the R-tree (and thus also the total number of nodes). This will be used
// to allocate space for each level of the tree.
//
// The tree is **laid out in memory from bottom (leaves) to top (root)**.
// `_levelBounds` is an array that stores the offset within the coordinates
// array where each level **ends**. The first element of `_levelBounds` is
// `n * 4`, meaning that the slice of the coordinates array from `0` to `n *
// 4` contains the bottom (leaves) of the tree.
//
// Then the slice of the coordinates array from `_levelBounds[0]` to
// `_levelBounds[1]` represents the boxes of the first level of the tree,
// that is, the direct parent nodes of the leaves. And so on,
// `_levelBounds[1]` to `_levelBounds[2]` represents the nodes at level 2,
// the grandparent nodes of the leaf nodes.
//
// So for example if `numItems` is 10,000 and `nodeSize` is 16,
// `levelBounds` will be:
// ```
// [40000, 42500, 42660, 42672, 42676]
// ```
//
// That is:
// - The first 40,000 elements (10,000 nodes) are coordinates of the leaf nodes (4 coordinates per node).
// - 2,500 coordinates and 625 nodes one level higher
// - 160 coordinates and 40 nodes two levels higher
// - 12 coordinates and 3 nodes three levels higher
// - 1 root node four levels higher, at the top of the tree, with a single 4-coordinate box.
//
// Keep in mind that because this is a _packed_ tree, every node within a
// single level will be **completely full** (contain exactly `nodeSize`
// elements) except for the last node.
//
// `numNodes` ends up as the total number of nodes in the tree, including
// all leaves.
let n = numItems;
let numNodes = n;
this._levelBounds = [n * 4];
do {
n = Math.ceil(n / this.nodeSize);
numNodes += n;
this._levelBounds.push(numNodes * 4);
} while (n !== 1);
// Flatbush doesn't manage references to objects directly. Rather, it
// operates in terms of the _insertion index_. Flatbush only maintains these
// insertion indices.
//
// `IndexArrayType` will be used to create the `indices` array, to store the
// ordering of the input boxes. If possible, a `Uint16Array` will be used to
// save space. If the values would overflow a `Uint16Array`, a `Uint32Array`
// is used. The largest number a `Uint16Array` can hold is `2^16 = 65,536`.
// Since each node holds four values, this gets divided by `4` and `65,536 /
// 4 = 16,384`. This is why the check here is for 16,384.
this.ArrayType = ArrayType;
this.IndexArrayType = numNodes < 16384 ? Uint16Array : Uint32Array;
// In order to accurately interpret the index from raw bytes, we need to
// record in the header which index type we're using.
const arrayTypeIndex = ARRAY_TYPES.indexOf(this.ArrayType);
// The number of bytes needed to store all box coordinate data for all
// nodes.
const nodesByteSize = numNodes * 4 * this.ArrayType.BYTES_PER_ELEMENT;
if (arrayTypeIndex < 0) {
throw new Error(`Unexpected typed array class: ${ArrayType}.`);
}
// This `if` statement switches on whether the `data` argument was passed in
// (i.e. this constructor is called by `Flatbush.from`). If `data` exists,
// this will create the `_boxes` and `_indices` arrays as **views** on the
// existing `ArrayBuffer` without allocating any new memory.
if (data && data.byteLength !== undefined && !data.buffer) {
this.data = data;
// `this._boxes` is created as a view on `this.data` starting after the
// header (8 bytes) and with `numNodes * 4` elements. `this._indices` is
// created as a view on `this.data` starting after the end of
// `this._boxes` and containing `numNodes` elements.
this._boxes = new this.ArrayType(this.data, 8, numNodes * 4);
this._indices = new this.IndexArrayType(
this.data,
8 + nodesByteSize,
numNodes
);
// The coordinate data in the `_boxes` array is stored from the leaves up.
// So the last box is the single node that contains all data. The index of
// the last box is the four values in `_boxes` up to `numNodes * 4`.
//
// This sets the total bounds on the `Flatbush` instance to the extent of
// that box.
//
// We also set `this._pos` as the total number of coordinates. `this._pos`
// is a pointer into the `this._boxes` array, used while adding new boxes
// to the instance. This also allows for inferring whether the `Flatbush`
// instance has been "finished" (sorted) or not.
//
// If the instance has already been sorted, adding more data is not
// allowed. Conversely, if the instance has not yet been sorted, query
// methods may not be called.
this._pos = numNodes * 4;
this.minX = this._boxes[this._pos - 4];
this.minY = this._boxes[this._pos - 3];
this.maxX = this._boxes[this._pos - 2];
this.maxY = this._boxes[this._pos - 1];
// In the `else` case, a `data` buffer was not provided, so we need to
// allocate data for the backing buffer.
//
// `this.data` is a new `ArrayBuffer` with space for the header plus all
// box data plus all index data. Then `this._boxes` is created as a view
// on `this.data` starting after the header and with `numNodes * 4`
// elements. `this._indices` is created as a view on `this.data` starting
// after the end of `this._boxes`.
} else {
this.data = new ArrayBufferType(
8 + nodesByteSize + numNodes * this.IndexArrayType.BYTES_PER_ELEMENT
);
this._boxes = new this.ArrayType(this.data, 8, numNodes * 4);
this._indices = new this.IndexArrayType(
this.data,
8 + nodesByteSize,
numNodes
);
// We set `this._pos` to 0. This means that no boxes have
// yet been added to the index, and it tells any query methods to throw
// until `finish` has been called.
this._pos = 0;
// The RTree needs to maintain its total bounds (the global bounding box
// of all values) in order to set the bounds for the hilbert space.
//
// We initialize these bounds to `Infinity` values that will be corrected
// when adding data. The minimum x/y of any box will be less than positive
// infinity and the maximum x/y of any box will be greater than negative
// infinity. The `add()` call will adjust these bounds if necessary.
this.minX = Infinity;
this.minY = Infinity;
this.maxX = -Infinity;
this.maxY = -Infinity;
// Next we set the header values with metadata from the instance.
//
// The first byte, `0xfb` is a "magic byte", used as basic validation that
// this buffer is indeed a Flatbush index.
//
// Since `arrayTypeIndex` is known to have only 9 values, it doesn't need
// to take up a a full byte. Here it shares a single byte with the
// Flatbush format version.
new Uint8Array(this.data, 0, 2).set([
0xfb,
(VERSION << 4) + arrayTypeIndex,
]);
new Uint16Array(this.data, 2, 1)[0] = nodeSize;
new Uint32Array(this.data, 4, 1)[0] = numItems;
}
// We initialize a [priority
// queue](https://en.wikipedia.org/wiki/Priority_queue) used for
// k-nearest-neighbors queries in the `neighbors` method.
/** @type FlatQueue<number> */
this._queue = new FlatQueue();
}
// ### Flatbush.Add
//
// Add a given rectangle to the index.
/**
* Add a given rectangle to the index.
* @param {number} minX
* @param {number} minY
* @param {number} maxX
* @param {number} maxY
* @returns {number} A zero-based, incremental number that represents the newly added rectangle.
*/
add(minX, minY, maxX, maxY) {
// We need to know the insertion index of the box presently being added.
//
// In the constructor, `this._pos` is initialized to `0` and in each call
// to `add()`, `this._pos` is incremented by `4`. Dividing `this._pos` by
// `4` retrieves the 0-based index of the box about to be inserted.
//
// This bit shift:
// ```js
// this._pos >> 2
// ```
// is equivalent to
// ```js
// this._pos / 4
// ```
// but the bit shift is faster because it informs the JS engine that we
// expect the output to be an integer.
//
// Because there are 4 values for each item, using `_pos` is an easy way to
// infer the insertion index without having to maintain a separate counter.
const index = this._pos >> 2;
const boxes = this._boxes;
// We set the value of `this._indices` at the current index's position to
// the value of the current index. So `this._indices` stores the insertion
// index of each box.
//
// Later, inside the `finish` method, we'll sort the boxes by their hilbert
// value and jointly reorder the values in `_indices`, ensuring that we keep
// the indices and boxes in sync.
//
// This means that for any box representing a leaf node at position `i`
// (where `i` points to a _box_ not a _coordinate_ inside a box),
// `this._indices[i]` retrieves the original insertion-order index of that
// box.
this._indices[index] = index;
// We set the coordinates of this box into the `boxes` array. Note that
// `this._pos++` is evaluated **after** the box index is set. So
//
// ```js
// boxes[this._pos++] = minX;
// ```
//
// is equivalent to
//
// ```js
// boxes[this._pos] = minX;
// this._pos += 1;
// ```
boxes[this._pos++] = minX;
boxes[this._pos++] = minY;
boxes[this._pos++] = maxX;
boxes[this._pos++] = maxY;
// Update the total bounds of this instance if this rectangle is larger than
// the existing bounds.
if (minX < this.minX) this.minX = minX;
if (minY < this.minY) this.minY = minY;
if (maxX > this.maxX) this.maxX = maxX;
if (maxY > this.maxY) this.maxY = maxY;
return index;
}
// ### Flatbush.finish
//
// A spatial index needs to sort input data so that elements can be found
// quickly later.
//
// The simplest way of sorting values is on a single dimension, where if `a`
// is less than `b`, `a` should be placed before `b`. But that presents a
// problem because we have _two_ dimensions, not one.
//
// One way to solve this is
// to map values from two-dimensional space into a one-dimensional range. A
// common way to perform this mapping is by using [space-filling
// curves](https://en.wikipedia.org/wiki/Space-filling_curve). In our case,
// we'll use a [hilbert curve](https://en.wikipedia.org/wiki/Hilbert_curve), a
// specific type of space-filling curve that's useful with geospatial data
// because it generally preserves locality.
//
// <div style="text-align: center;">
// <img src="https://upload.wikimedia.org/wikipedia/commons/7/7c/Hilbert-curve_rounded-gradient-animated.gif" width="260">
// </div>
//
// > First six iterations of the Hilbert curve, [from
// > Wikipedia](https://en.wikipedia.org/wiki/Hilbert_curve#/media/File:Hilbert-curve_rounded-gradient-animated.gif),
// > CC BY-SA.
//
// Note that using a space-filling curve to map values into one dimension
// isn't the only way of sorting multi-dimensional data. There are other
// algorithms, like
// [sort-tile-recursive (STR)](https://ia600900.us.archive.org/27/items/nasa_techdoc_19970016975/19970016975.pdf)
// that first sort into groups on one dimension, then the other, recursively.
//
// While this canonical Flatbush implementation chooses to sort based on
// hilbert value, that's actually not necessary to maintain ABI-stability: any
// two-dimensional sort will work. My [Rust
// port](https://github.com/kylebarron/geo-index) defines an [extensible
// trait](https://docs.rs/geo-index/latest/geo_index/rtree/sort/trait.Sort.html)
// for sorting and provides both hilbert and STR sorting implementations.
//
/** Perform indexing of the added rectangles. */
finish() {
// Recall that in the `add` method, we increment `this._pos` by `1` for each
// coordinate of each box. Here we validate that we've added the same number
// of boxes as we provisioned in the constructor. Remember that `>> 2` is
// equivalent to `/ 4`.
if (this._pos >> 2 !== this.numItems) {
throw new Error(
`Added ${this._pos >> 2} items when expected ${this.numItems}.`
);
}
const boxes = this._boxes;
// If the total number of items in the tree is less than the node size, that
// means we'll only have a single non-leaf node in the tree. In that case,
// we don't even need to sort by hilbert value. We can just assign the total
// bounds of the tree to the following box and return.
if (this.numItems <= this.nodeSize) {
boxes[this._pos++] = this.minX;
boxes[this._pos++] = this.minY;
boxes[this._pos++] = this.maxX;
boxes[this._pos++] = this.maxY;
return;
}
// Using the total bounds of the tree, we compute the height and width of
// the hilbert space and instantiate space for the hilbert values.
const width = this.maxX - this.minX || 1;
const height = this.maxY - this.minY || 1;
const hilbertValues = new Uint32Array(this.numItems);
const hilbertMax = (1 << 16) - 1;
// Map box centers into Hilbert coordinate space and calculate Hilbert
// values using the `hilbert` function defined below.
//
// This for loop iterates over every box. At the beginning of each loop
// iteration, `pos` is equal to `i * 4`.
for (let i = 0, pos = 0; i < this.numItems; i++) {
const minX = boxes[pos++];
const minY = boxes[pos++];
const maxX = boxes[pos++];
const maxY = boxes[pos++];
const x = Math.floor(
(hilbertMax * ((minX + maxX) / 2 - this.minX)) / width
);
const y = Math.floor(
(hilbertMax * ((minY + maxY) / 2 - this.minY)) / height
);
hilbertValues[i] = hilbert(x, y);
}
// Up until this point, the values in `boxes` and in `this._indices` are
// still in _insertion order_. We now jointly sort the boxes and indices
// according to their hilbert values.
sort(
hilbertValues,
boxes,
this._indices,
0,
this.numItems - 1,
this.nodeSize
);
// Now the leaves of the tree have been sorted, but we still need to
// construct the rest of the tree.
//
// For each level of the tree, we need to generate parent nodes that contain
// `nodeSize` child nodes. We do this starting from the leaves, working from
// the bottom up.
//
// Here the iteration variable, `i`, refers to the positional **tree
// level**, which is also an index into the `this._levelBounds` array.
//
// - When `i == 0`, we're iterating over the original geometry boxes.
// - When `i == 1`, we're iterating over the parent nodes one level up that
// we previously generated from the first loop iteration.
// - And so on, `i` represents the number of parents from the original
// geometry boxes.
//
// As elsewhere, `pos` is a local variable that points to a coordinate
// within a box at the given level `i` of the tree. Note this syntax: it's
// unusual for _two_ variables to be defined in the `for` loop binding: here
// both `i` and `pos` are only defined within the scope of this loop. But
// only `i` is incremented by the loop. `pos` is incremented separately
// within the body of the loop (four times for each box).
for (let i = 0, pos = 0; i < this._levelBounds.length - 1; i++) {
// Next, we want to scan through all nodes at this level of the tree,
// generating a parent node for each **group** of consecutive `nodeSize`
// boxes.
//
// Here, `end` is the index of the first coordinate at the _next level
// above the current level_. So the range up to `end` includes all
// coordinates at the current tree level.
//
// We then scan over all of these box coordinates in this while loop.
const end = this._levelBounds[i];
while (pos < end) {
// We record the `pos` pointing to the first element of the first box in
// each **group** of consecutive `nodeSize` boxes, in order to later
// record it in the `indices` array.
const nodeIndex = pos;
// Calculate the bounding box for the new parent node.
//
// We initialize the bounding box to the first box and then expand the
// box while looping over the rest of the elements that together are the
// children of this parent node we're creating.
//
// Note the `j = 1` in the loop; this is a small optimization because we
// initialize the `node*` variables to the first element, rather than
// initializing with positive and negative infinity.
//
// Also note that in the loop we constrain the iteration variable `j` to
// be both less than the node size and for `pos < end`. The former
// ensures we have only a maximum of `nodeSize` elements informing the
// parent node's boundary. The latter ensures that we don't accidentally
// overflow the current tree level.
let nodeMinX = boxes[pos++];
let nodeMinY = boxes[pos++];
let nodeMaxX = boxes[pos++];
let nodeMaxY = boxes[pos++];
for (let j = 1; j < this.nodeSize && pos < end; j++) {
nodeMinX = Math.min(nodeMinX, boxes[pos++]);
nodeMinY = Math.min(nodeMinY, boxes[pos++]);
nodeMaxX = Math.max(nodeMaxX, boxes[pos++]);
nodeMaxY = Math.max(nodeMaxY, boxes[pos++]);
}
// Now that we know the extent of the parent node, we can add the new
// node's information to the tree data.
//
// Recall that `nodeIndex`, stored above, points to the first element of
// the first box in each group of consecutive `nodeSize` nodes.
//
// The `nodeIndex` is always a multiple of 4 because there are 4
// coordinates in each 2D box. This means we can divide by 4 to store
// the node index information more compactly. Again, we use `>> 2`
// instead of `/ 4` as a performance optimization.
//
// When we're at the base (leaf) level of the tree, `nodeIndex`
// represents the insertion index of the first box in this group.
//
// Similarly, when we're at higher levels of the tree, `nodeIndex`
// represents the offset of the first box in this group.
//
// These two facts allow us to traverse the tree in a search query, as
// we'll see below in `Flatbush.search`.
//
// Note that we're setting the parent node into `this._indices` and
// `boxes` according to **`this._pos`**, which **is a different variable
// than the local `pos` variable that's incremented in this loop.**
// `this._pos` is a **global** counter that keeps track of the new nodes
// we're **inserting** into the index. In contrast, `pos` is a **local**
// counter for aggregating the information for the parent node.
//
// Impressively, these loops do all the hard work of constructing the
// tree! That's it! The structure of the tree and the coordinates of all
// the parent nodes are now fully contained within `this._indices` and
// `boxes`, which are both views on `this.data`!
this._indices[this._pos >> 2] = nodeIndex;
boxes[this._pos++] = nodeMinX;
boxes[this._pos++] = nodeMinY;
boxes[this._pos++] = nodeMaxX;
boxes[this._pos++] = nodeMaxY;
}
}
}
// ### Flatbush.search
//
// The primary API for searching an index by a bounding box query.
/**
* Search the index by a bounding box.
* @param {number} minX
* @param {number} minY
* @param {number} maxX
* @param {number} maxY
* @param {(index: number) => boolean} [filterFn] An optional function for filtering the results.
* @returns {number[]} An array of indices of items intersecting or touching the given bounding box.
*/
search(minX, minY, maxX, maxY, filterFn) {
// A simple check to ensure that this index has been finished/sorted.
if (this._pos !== this._boxes.length) {
throw new Error("Data not yet indexed - call index.finish().");
}
// `nodeIndex` is initialized to the root node, the parent of all other
// nodes. Since the tree is laid out from bottom to top, the root node is
// the last node in `this._boxes`. We subtract `4` so that `nodeIndex`
// points to the _first_ coordinate of the box.
//
// Note that `nodeIndex` will always point to the **first box** within a
// group of (usually `nodeSize`) boxes.
//
// `queue` holds integers that represent the position within `this._indices`
// of intermediate nodes that still need to be searched. That is, `queue`
// represents nodes whose parents intersected the search predicate.
//
// `results` holds integers that represent the insertion indexes that match
// the search predicate.
/** @type number | undefined */
let nodeIndex = this._boxes.length - 4;
const queue = [];
const results = [];
// Now we have our search loop.
//
// ```js
// while (nodeIndex !== undefined)
// ```
//
// will be `true` as long as there are still elements remaining in `queue`
// (note that the last line of the `while` loop is `nodeIndex =
// queue.pop();`).
while (nodeIndex !== undefined) {
// Find the end index of the current node.
//
// Most of the time, the node contains `nodeSize` elements. At the end of
// each level, the node will contain fewer elements. In the first case,
// the end of the node will be the current index plus 4 coordinates for
// each box. We check if we're in the second case by checking the value of
// `this._levelBounds` for the current level of the tree.
const end = Math.min(
nodeIndex + this.nodeSize * 4,
upperBound(nodeIndex, this._levelBounds)
);
// Then we search through each box of the current node, checking whether
// each matches our predicate. The loop ranges from the first node of the
// level (`nodeIndex`) to the last (`end`). We increment `pos` by `4` for
// each loop step because there are 4 coordinates.
for (let /** @type number */ pos = nodeIndex; pos < end; pos += 4) {
// Check if the current box **does not intersect** with query box. If
// the current box does not intersect, then we can continue on to the
// next element of this node.
//
// If we reach past these four lines, then we know the current box
// **does intersect** with the query box.
if (maxX < this._boxes[pos]) continue; // maxX < nodeMinX
if (maxY < this._boxes[pos + 1]) continue; // maxY < nodeMinY
if (minX > this._boxes[pos + 2]) continue; // minX > nodeMaxX
if (minY > this._boxes[pos + 3]) continue; // minY > nodeMaxY
// `pos` is a pointer to the first coordinate of the given box.
// Recall in `Flatbush.finish` that we set:
//
// ```js
// this._indices[this._pos >> 2] = nodeIndex;
// ```
//
// This stored a mapping from parent to child node, where `this._pos >>
// 2` was the parent node and `nodeIndex` was the child node. Now is the
// time when we want to use this mapping.
//
// - If the current box _is not_ a leaf, `index` is the `pos` of
// the first box of the child node. This child is a node that we
// should evaluate later, so we add it to the `queue` array.
// - If the current box _is_ a leaf, then `index` is the original
// insertion index, and we add it to the `results` array.
//
// Again, `pos >> 2` is a faster way of expressing `pos / 4`, where we
// can inform the JS engine that the output will be an integer.
//
// I believe `| 0` is just a JS engine optimization that doesn't affect
// the output of the operation?
//
// Then we can add the `index` to either the intermediate `queue` or
// `results` arrays as necessary.
const index = this._indices[pos >> 2] | 0;
if (nodeIndex >= this.numItems * 4) {
queue.push(index); // node; add it to the search queue
} else if (filterFn === undefined || filterFn(index)) {
results.push(index); // leaf item
}
}
// Set the `nodeIndex` to the next item in the `queue` so that we continue
// the `while` loop.
nodeIndex = queue.pop();
}
return results;
}
// ### Flatbush.neighbors
//
// The primary API for searching an index by nearest neighbors to a point.
//
// This has significant overlap with `Flatbush.search`, and so we'll only
// touch on the differences.
/**
* Search items in order of distance from the given point.
* @param {number} x
* @param {number} y
* @param {number} [maxResults=Infinity]
* @param {number} [maxDistance=Infinity]
* @param {(index: number) => boolean} [filterFn] An optional function for filtering the results.
* @returns {number[]} An array of indices of items found.
*/
neighbors(x, y, maxResults = Infinity, maxDistance = Infinity, filterFn) {
if (this._pos !== this._boxes.length) {
throw new Error("Data not yet indexed - call index.finish().");
}
// Instead of using an array as a queue, here we use a priority queue. This
// is a data structure that maintains the queue in sorted order, and which
// allows us to ensure that the first element of the queue is indeed the
// closest to the provided point.
/** @type number | undefined */
let nodeIndex = this._boxes.length - 4;
const q = this._queue;
const results = [];
const maxDistSquared = maxDistance * maxDistance;
outer: while (nodeIndex !== undefined) {
const end = Math.min(
nodeIndex + this.nodeSize * 4,
upperBound(nodeIndex, this._levelBounds)
);
// Add child nodes to the queue.
//
// `dx` and `dy` are computed as the _one-dimensional_ change in `x` and
// `y` needed to reach one of the sides of the box from the query point.
// Then `dist` is the squared distance to reach the corner of the box
// closest to the query point.
//
// If this distance is less than the provided maximum distance, we add it
// to the queue. Since we add both intermediate nodes _and_ results to the
// same queue, we need a way to distinguish the two. When the `index`
// represents an intermediate node, we multiply by two (i.e. `<< 1`) so
// that we have an even id. When the `index` represents a leaf item, we
// multiply by two and then add one (i.e. `(<< 1) + 1`), so that we have
// an odd id.
for (let pos = nodeIndex; pos < end; pos += 4) {
const index = this._indices[pos >> 2] | 0;
const dx = axisDist(x, this._boxes[pos], this._boxes[pos + 2]);
const dy = axisDist(y, this._boxes[pos + 1], this._boxes[pos + 3]);
const dist = dx * dx + dy * dy;
if (dist > maxDistSquared) continue;
if (nodeIndex >= this.numItems * 4) {
q.push(index << 1, dist); // node (use even id)
} else if (filterFn === undefined || filterFn(index)) {
q.push((index << 1) + 1, dist); // leaf item (use odd id)
}
}
// Now that we've added all child nodes to the queue, we can move queue
// items to the results array and/or break out of the outer loop
// completely.
//
// Since this queue is a priority queue, we can be assured that the first
// item of the queue is the closest to the query point. The nearest corner
// of the box of that item is closer than any other node or result.
//
// While the `queue` is non-empty and the first (closest) item in the
// queue is a leaf item (odd), if that item's distance is less than the
// maximum query distance, we can break out of the outer loop, since there
// cannot be any more nodes that are closer than that distance. If the
// item's distance is less than the maximum query distance, we add it to
// the results array because it must be the next closest result.
//
// If the first (closest) item of the `queue` is an intermediate node (not
// odd), then we need to evaluate the items of that node before knowing
// which one is the next closest. In this case, the `while` condition is
// `false`, and we set the `nodeIndex` to that intermediate node for the
// next iteration of the outer `while` loop.
while (q.length && q.peek() & 1) {
const dist = q.peekValue();
if (dist > maxDistSquared) break outer;
results.push(q.pop() >> 1);
if (results.length === maxResults) break outer;
}
nodeIndex = q.length ? q.pop() >> 1 : undefined;
}
// We clear the queue because this queue is reused for all queries in this
// index.
q.clear();
return results;
}
}
// The remaining code is "just" utility functions.
//
// I won't document these in detail because they tend to be self explanatory or
// easily found online and this post is focused more on the RTree implementation
// itself.
//
// `axisDist`: 1D distance from a value to a range.
/**
* 1D distance from a value to a range.
* @param {number} k
* @param {number} min
* @param {number} max
*/
function axisDist(k, min, max) {
return k < min ? min - k : k <= max ? 0 : k - max;
}
// `upperBound`: Binary search for the first value in the array bigger than the
// given.
/**
* Binary search for the first value in the array bigger than the given.
* @param {number} value
* @param {number[]} arr
*/
function upperBound(value, arr) {
let i = 0;
let j = arr.length - 1;
while (i < j) {
const m = (i + j) >> 1;
if (arr[m] > value) {
j = m;
} else {
i = m + 1;
}
}
return arr[i];
}
// `sort`: Custom quicksort that partially sorts bbox data alongside the hilbert values.
/**
* Custom quicksort that partially sorts bbox data alongside the hilbert values.
* @param {Uint32Array} values
* @param {InstanceType<TypedArrayConstructor>} boxes
* @param {Uint16Array | Uint32Array} indices
* @param {number} left
* @param {number} right
* @param {number} nodeSize
*/
function sort(values, boxes, indices, left, right, nodeSize) {
if (Math.floor(left / nodeSize) >= Math.floor(right / nodeSize)) return;
const pivot = values[(left + right) >> 1];
let i = left - 1;
let j = right + 1;
while (true) {
do i++;
while (values[i] < pivot);
do j--;
while (values[j] > pivot);
if (i >= j) break;
swap(values, boxes, indices, i, j);
}
sort(values, boxes, indices, left, j, nodeSize);
sort(values, boxes, indices, j + 1, right, nodeSize);
}
// `swap`: Swap two values and two corresponding boxes.
/**
* Swap two values and two corresponding boxes.
* @param {Uint32Array} values
* @param {InstanceType<TypedArrayConstructor>} boxes
* @param {Uint16Array | Uint32Array} indices
* @param {number} i
* @param {number} j
*/
function swap(values, boxes, indices, i, j) {
const temp = values[i];
values[i] = values[j];
values[j] = temp;
const k = 4 * i;
const m = 4 * j;
const a = boxes[k];
const b = boxes[k + 1];
const c = boxes[k + 2];
const d = boxes[k + 3];
boxes[k] = boxes[m];
boxes[k + 1] = boxes[m + 1];
boxes[k + 2] = boxes[m + 2];
boxes[k + 3] = boxes[m + 3];
boxes[m] = a;
boxes[m + 1] = b;
boxes[m + 2] = c;
boxes[m + 3] = d;
const e = indices[i];
indices[i] = indices[j];
indices[j] = e;
}
// `hilbert`: compute hilbert codes.
//
// This is the function that takes a position in 2D space, `x` and `y`, and
// returns the hilbert value for that position.
//