diff --git a/vitalik_merkle_optimizations/LICENSE b/vitalik_merkle_optimizations/LICENSE
new file mode 100644
index 0000000..688262d
--- /dev/null
+++ b/vitalik_merkle_optimizations/LICENSE
@@ -0,0 +1,21 @@
+The MIT License (MIT)
+
+ Copyright (c) 2015-2018 Vitalik Buterin
+
+ Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+ The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
diff --git a/vitalik_merkle_optimizations/README.md b/vitalik_merkle_optimizations/README.md
new file mode 100644
index 0000000..2893294
--- /dev/null
+++ b/vitalik_merkle_optimizations/README.md
@@ -0,0 +1,29 @@
+# Sparse Merkle Tree
+## Client Side Optimizations (Vitalik Buterin)
+
+This code was copied from Vitalik's research [github repo](https://github.com/ethereum/research/tree/master/sparse_merkle_tree/) on September 2024.
+
+The code demonstrates a number of optimizations applicable to the client-side Sparse Merkle Tree implementation. These optimizations are discussed in Vitalik's post [here](https://ethresear.ch/t/optimizing-sparse-merkle-trees/3751/5).
+
+The only changes made are cosmetic (mostly comments) that describe the optimization logic.
+
+In all the folder includes 3 SMT implementations:
+* [new_bintrie.py](./new_bintrie.py) - This is a naive SMT implementation with a compress/decompress function. This replaces zero subtree hashes from the proof with a set of flags, recording the tree levels from which the hashes where removed.
+
+* [new_bintrie_optimized.py](./new_bintrie_optimized.py) - Implements the optimization discussed in Vitalik's post. This primarily involves short-cutting the tree storage as soon as a sub-tree is exclusivly composed of a single non-zero leaf. Whereas normally a tree is stored as a list of mappings:
+ `parent -> (left_hash, right_hash)`
+
+ ...with this optimization a sub-tree having a single non-zero leaf is terminated with an entry of this type:
+ `parent -> (flag, path, hash)`
+
+ `flag` - identifies that this is a special "tree termination" encoding.
+ `path` - identifies the address/index of the non-zero leaf within the subtree.
+ `hash` - the hash of the non-zero leaf
+
+ This information allows us to stop traversing the tree and immidiately determine whether or not the traversal will hit the non-zero leaf.
+
+* [new_bintrie_hex.py](./new_bintrie_hex.py) - Implements the single non-zero leaf short-cut optimization just discussed, plus a compression trick. The compression reduces the number of levels sotred by a factor of 4. Instead of storing a full binary tree stucture, a parent is made to point at the set of child-nodes found 4 levels underneath it. So the root will point at the nodes at level 4. Level 4 nodes would then point at their child nodes 4 levels underneath them (found at level 8 of the main tree).
+
+ This means that a parent node will point at `2^4 = 16` child-node hashes.
+
+ This is only a client-side optimization. We still assume operation against a regular 256-level Sparse Merkle Tree. The only difference is the change in storage stucture. The parent node hash is still computed by recursively hashing sibling pairs. However this is done for four levels.
\ No newline at end of file
diff --git a/vitalik_merkle_optimizations/bintrie_test.py b/vitalik_merkle_optimizations/bintrie_test.py
index e0c2460..be229cb 100644
--- a/vitalik_merkle_optimizations/bintrie_test.py
+++ b/vitalik_merkle_optimizations/bintrie_test.py
@@ -13,6 +13,8 @@
r = t1.update(d, r, k, k)
print("Naive bintree time to update: %.4f" % (time.time() - a))
print("Root: %s" % binascii.hexlify(r))
+print()
+print()
d = t2.EphemDB()
r = t2.new_tree(d)
@@ -22,12 +24,15 @@
print("DB-optimized bintree time to update: %.4f" % (time.time() - a))
print("Root: %s" % binascii.hexlify(r))
print("Writes: %d, reads: %d" % (d.writes, d.reads))
+
d.reads = 0
for k in keys[:500]:
assert t2.get(d, r, k) == k
for k in keys[-500:]:
assert t2.get(d, r, k) == b'\x00' * 32
print("Reads: %d" % d.reads)
+print()
+print()
d = t3.EphemDB()
r = t3.new_tree(d)
diff --git a/vitalik_merkle_optimizations/new_bintrie.py b/vitalik_merkle_optimizations/new_bintrie.py
index a12c4c5..bec070b 100644
--- a/vitalik_merkle_optimizations/new_bintrie.py
+++ b/vitalik_merkle_optimizations/new_bintrie.py
@@ -1,5 +1,9 @@
from ethereum.utils import sha3, encode_hex
+# Comments by Alexander Zammit
+# Key:Value mapping,
+# key = parent hash
+# value = concatenated siblings
class EphemDB():
def __init__(self, kv=None):
self.kv = kv or {}
@@ -13,10 +17,18 @@ def put(self, k, v):
def delete(self, k):
del self.kv[k]
+# Initializes the zero subtree hashes
+# Ultimately element zero will be the root
+# ...and the last element zerohashes[256] will be the zero leaf.
+# zerohashes[256] = 0x000000000...
+# zerohashes[255] = sha3(zerohashes[256] + zerohashes[256])
+# zerohashes[254] = sha3(zerohashes[255] + zerohashes[255])
zerohashes = [b'\x00' * 32]
for i in range(256):
zerohashes.insert(0, sha3(zerohashes[0] + zerohashes[0]))
+# Initializes the db (an instance of EphemDB)
+# With an empty, all-zero tree having 256 levels
def new_tree(db):
h = b'\x00' * 32
for i in range(256):
@@ -25,12 +37,21 @@ def new_tree(db):
h = newh
return h
+# Application is working with an array of bytes
+# A 256-bit path is constructed from a key composed
+# of a 32x1-byte array.
+# Function simply converts from 32-bytes array to
+# one 256-bit value
def key_to_path(k):
o = 0
for c in k:
o = (o << 8) + c
return o
+# Traverse tree from root to the end of the path
+# by following the provided path. The path is just
+# a set of left/right flags telling us which sibling
+# to pick. Returns node value at the path end.
def descend(db, root, *path):
v = root
for p in path:
@@ -40,6 +61,9 @@ def descend(db, root, *path):
v = db.get(v)[:32]
return v
+# Similar to descend, but this time the path
+# is derived from the key hance we always end
+# up to the leaf level.
def get(db, root, key):
v = root
path = key_to_path(key)
@@ -51,6 +75,14 @@ def get(db, root, key):
path <<= 1
return v
+# Update leaf value.
+# The function adds all the new nodes
+# that result from changing a leaf value.
+# The function does not delete orphaned nodes.
+# Function performs two pases. In the first
+# pass it identifies the siblings relevant
+# to computing the new node hashes.
+# In the second pass it adds th new nodes.
def update(db, root, key, value):
v = root
path = path2 = key_to_path(key)
@@ -76,6 +108,10 @@ def update(db, root, key, value):
sidenodes.pop()
return v
+# Returns an array of siblings necessary to
+# proving membership. Siblings are collected
+# by traversing from root to leaf and the
+# returned array is ordered [root-1 -> leaf]
def make_merkle_proof(db, root, key):
v = root
path = key_to_path(key)
@@ -90,6 +126,8 @@ def make_merkle_proof(db, root, key):
path <<= 1
return sidenodes
+# Verify proof by recomputing root from the
+# proof which is just an array of siblings.
def verify_proof(proof, root, key, value):
path = key_to_path(key)
v = value
@@ -102,6 +140,9 @@ def verify_proof(proof, root, key, value):
v = newv
return root == v
+# Compress the proof by removing all zero hashes
+# and adding a 256-bit (8*32-bytes) to identify
+# which of the elments where removed.
def compress_proof(proof):
bits = bytearray(32)
oproof = b''
@@ -112,6 +153,11 @@ def compress_proof(proof):
oproof += p
return bytes(bits) + oproof
+# Recover full proof by re-inserting zero hashes
+# This uses the 256-bit flags to identify the missing
+# hashes. Function ouputs a new proof which is a merge
+# of the compressed proof hashes and the newly inserted
+# zero hashes
def decompress_proof(oproof):
proof = []
bits = bytearray(oproof[:32])
diff --git a/vitalik_merkle_optimizations/new_bintrie_hex.py b/vitalik_merkle_optimizations/new_bintrie_hex.py
index 354e0cb..af9901b 100644
--- a/vitalik_merkle_optimizations/new_bintrie_hex.py
+++ b/vitalik_merkle_optimizations/new_bintrie_hex.py
@@ -37,6 +37,28 @@ def path_to_key(k):
return (k & tt256m1).to_bytes(32, 'big')
# Read a key from a given tree
+#
+# This is not a binary tree. Instead each parent has 16 children.
+# This means that traversing one tree level is equivalent to
+# jumping 4 levels in a binary tree.
+#
+# The path still includes all levels we would have in a binary tree
+# however each four bits is a selector within one level.
+#
+# Thus the loop increments by 4 ==> for i in range(0, 256, 4)
+#
+# Traversing child nodes involves extracting level selector:
+# index = (path >> 252) & 15
+#
+# ...and extracting one of 16, 32-byte child hashes:
+# v = child[32*index: 32*index+32]
+#
+# [___]
+# |
+# ------------------........-----------------
+# | | | | | |
+# [___] [___] [___] [___] [___] [___]
+#
def get(db, root, key):
v = root
path = key_to_path(key)
@@ -65,6 +87,13 @@ def make_single_key_hash(path, depth, value):
return sha3(make_single_key_hash(path << 1, depth + 1, value) + zerohashes[depth+1])
# Hash together 16 elements
+#
+# The function is computing the hash of a 4-level binary tree
+# vals is an array that would initially contain 16 elements
+# vals is reallocated 4 times in each round halfing its size.
+#
+# In this manner we are hashing adjecent siblings moving from
+# leafs to root
def hash_16_els(vals):
assert len(vals) == 16
for _ in range(4):
diff --git a/vitalik_merkle_optimizations/new_bintrie_optimized.py b/vitalik_merkle_optimizations/new_bintrie_optimized.py
index 9748b5c..20a3bb8 100644
--- a/vitalik_merkle_optimizations/new_bintrie_optimized.py
+++ b/vitalik_merkle_optimizations/new_bintrie_optimized.py
@@ -1,5 +1,12 @@
from ethereum.utils import sha3, encode_hex
+# Comments by Alexander Zammit
+# Key:Value mapping,
+# key = parent hash
+# value = concatenated siblings
+#
+# Class also keeps count of the number
+# or read/write operations.
class EphemDB():
def __init__(self, kv=None):
self.reads = 0
@@ -17,26 +24,59 @@ def put(self, k, v):
def delete(self, k):
del self.kv[k]
-# Hashes of empty subtrees
+# Initializes the zero subtree hashes
+# Ultimately element zero will be the root
+# ...and the last element zerohashes[256] will be the zero leaf.
+# zerohashes[256] = 0x000000000...
+# zerohashes[255] = sha3(zerohashes[256] + zerohashes[256])
+# zerohashes[254] = sha3(zerohashes[255] + zerohashes[255])
zerohashes = [b'\x00' * 32]
for i in range(256):
zerohashes.insert(0, sha3(zerohashes[0] + zerohashes[0]))
# Create a new empty tree
+#
+# New trees are empty hence we just return
+# the zero hash for the root.
def new_tree(db):
return zerohashes[0]
# Convert a binary key into an integer path value
+#
+# Function simply converts from 32-bytes array to
+# one 256-bit value
def key_to_path(k):
return int.from_bytes(k, 'big')
tt256m1 = 2**256 - 1
# And convert back
+#
+# From 256-bit integer to an array of bytes
def path_to_key(k):
return (k & tt256m1).to_bytes(32, 'big')
# Read a key from a given tree
+#
+# Reading of a leaf value includes 2 optimizations
+# 1. If the traversal hits a zero hash than the
+# the leaf is known to be a zero
+#
+# 2. For subtrees that include a single non-zero leaf
+# the sibling pair is encoded as follows:
+#
+# <1-byte><------32-bytes------><-----32-bytes----->
+#
+# The flag is tested using: len(child) == 65
+#
+# If present, we check if the remaining path
+# (path % 2**256) matches path at child[1:33]
+#
+# * If YES we have a non-zero leaf whose hash is
+# child[33:]
+#
+# * If no, return Zero.
+#
def get(db, root, key):
v = root
path = key_to_path(key)
@@ -58,6 +98,13 @@ def get(db, root, key):
return v
# Make a root hash of a (sub)tree with a single key/value pair
+#
+# Generate the parent hash that sits on top of a sub-tree with
+# a single non-zero leaf. This is optimized by eliminating
+# all database lookups.
+#
+# Hash is computed by recursively queuing hsashing calls which
+# are finally executed in reverse order from bottom to top.
def make_single_key_hash(path, depth, value):
if depth == 256:
return value
@@ -67,6 +114,11 @@ def make_single_key_hash(path, depth, value):
return sha3(make_single_key_hash(path << 1, depth + 1, value) + zerohashes[depth+1])
# Make a root hash of a (sub)tree with two key/value pairs, and save intermediate nodes in the DB
+#
+# Adds DB nodes in cases where the subtree includes 2 non-zero leafs.
+#
+# Function shows how the value of a single leaf subtree is encoded
+# with an extra byte.
def make_double_key_hash(db, path1, path2, depth, value1, value2):
if depth == 256:
raise Exception("Cannot fit two values into one slot!")
@@ -97,6 +149,7 @@ def make_double_key_hash(db, path1, path2, depth, value1, value2):
def update(db, root, key, value):
return _update(db, root, key_to_path(key), 0, value)
+# Adds leaf to tree
def _update(db, root, path, depth, value):
if depth == 256:
return value