Skip to content

Commit

Permalink
[GR-30782] GSoC: Use a compact hash table for RubyHash instead of the…
Browse files Browse the repository at this point in the history
… buckets strategy (#3172)

PullRequest: truffleruby/4078
  • Loading branch information
eregon committed Dec 13, 2023
2 parents 58f96a4 + 07e1d83 commit ca74b40
Show file tree
Hide file tree
Showing 23 changed files with 981 additions and 191 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ Compatibility:

Performance:

* Change the `Hash` representation from traditional buckets to a "compact hash table" for improved locality, performance and memory footprint (#3172, @moste00).
* Optimize calls with `ruby2_keywords` forwarding by deciding it per call site instead of per callee thanks to [my fix in CRuby 3.2](https://bugs.ruby-lang.org/issues/18625) (@eregon).
* Optimize feature loading when require is called with an absolute path to a .rb file (@rwstauner).

Expand Down
23 changes: 23 additions & 0 deletions bench/micro/hash/buckets-lookup.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# truffleruby_primitives: true

# Copyright (c) 2023 Oracle and/or its affiliates. All rights reserved. This
# code is released under a tri EPL/GPL/LGPL license. You can use it,
# redistribute it and/or modify it under the terms of the:
#
# Eclipse Public License version 2.0, or
# GNU General Public License version 2, or
# GNU Lesser General Public License version 2.1.

# Benchmarks looking up keys

max = 400_000 # > 0.75*(524288 + 21) (cf. BucketsHashStore)
hash = { a: 1, b: 2, c: 3, d: 4 } # big enough to start as a bucket hash
max.times { |i|
hash[i] = i
}

benchmark 'core-hash-buckets-lookup' do
1000.times do |i|
Primitive.blackhole(hash[i])
end
end
2 changes: 1 addition & 1 deletion bench/micro/hash/each-buckets.rb
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,6 @@
if RUBY_ENGINE == 'truffleruby'
hash = { a: 1, b: 2, c: 3, d: 4, e: 5, f: 6, g: 7, h: 8, i: 9, j: 10 }
benchmark 'core-hash-each-buckets' do
hash.each { |k, v| Primitive.blackhole(v) }
hash.each { |k, v| Primitive.blackhole(k); Primitive.blackhole(v) }
end
end
18 changes: 16 additions & 2 deletions spec/ruby/core/hash/delete_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -24,21 +24,35 @@
it "allows removing a key while iterating" do
h = { a: 1, b: 2 }
visited = []
h.each_pair { |k,v|
h.each_pair { |k, v|
visited << k
h.delete(k)
}
visited.should == [:a, :b]
h.should == {}
end

it "allows removing a key while iterating for big hashes" do
h = { a: 1, b: 2, c: 3, d: 4, e: 5, f: 6, g: 7, h: 8, i: 9, j: 10,
k: 11, l: 12, m: 13, n: 14, o: 15, p: 16, q: 17, r: 18, s: 19, t: 20,
u: 21, v: 22, w: 23, x: 24, y: 25, z: 26 }
visited = []
h.each_pair { |k, v|
visited << k
h.delete(k)
}
visited.should == [:a, :b, :c, :d, :e, :f, :g, :h, :i, :j, :k, :l, :m,
:n, :o, :p, :q, :r, :s, :t, :u, :v, :w, :x, :y, :z]
h.should == {}
end

it "accepts keys with private #hash method" do
key = HashSpecs::KeyWithPrivateHash.new
{ key => 5 }.delete(key).should == 5
end

it "raises a FrozenError if called on a frozen instance" do
-> { HashSpecs.frozen_hash.delete("foo") }.should raise_error(FrozenError)
-> { HashSpecs.frozen_hash.delete("foo") }.should raise_error(FrozenError)
-> { HashSpecs.empty_frozen_hash.delete("foo") }.should raise_error(FrozenError)
end
end
30 changes: 30 additions & 0 deletions spec/ruby/core/hash/rehash_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,36 @@ def k1.hash; 1; end
h.keys.should_not.include? [1]
end

it "iterates keys in insertion order" do
key = Class.new do
attr_reader :name

def initialize(name)
@name = name
end

def hash
123
end
end

a, b, c, d = key.new('a'), key.new('b'), key.new('c'), key.new('d')
h = { a => 1, b => 2, c => 3, d => 4 }
h.size.should == 4

key.class_exec do
def eql?(other)
true
end
end

h.rehash
h.size.should == 1
k, v = h.first
k.name.should == 'a'
v.should == 4
end

it "raises a FrozenError if called on a frozen instance" do
-> { HashSpecs.frozen_hash.rehash }.should raise_error(FrozenError)
-> { HashSpecs.empty_frozen_hash.rehash }.should raise_error(FrozenError)
Expand Down
36 changes: 18 additions & 18 deletions src/main/java/org/truffleruby/core/array/ArrayNodes.java
Original file line number Diff line number Diff line change
Expand Up @@ -909,46 +909,46 @@ Object equalNotArray(RubyArray a, Object b) {
@ImportStatic(ArrayGuards.class)
public abstract static class EqlNode extends PrimitiveArrayArgumentsNode {

@Child private SameOrEqlNode eqlNode = SameOrEqlNode.create();

@Specialization(
guards = { "stores.accepts(bStore)", "stores.isPrimitive(aStore)" },
limit = "storageStrategyLimit()")
boolean eqlSamePrimitiveType(RubyArray a, RubyArray b,
static boolean eqlSamePrimitiveType(RubyArray a, RubyArray b,
@Bind("a.getStore()") Object aStore,
@Bind("b.getStore()") Object bStore,
@CachedLibrary("aStore") ArrayStoreLibrary stores,
@Cached ConditionProfile sameProfile,
@Cached IntValueProfile arraySizeProfile,
@Cached ConditionProfile sameSizeProfile,
@Cached BranchProfile trueProfile,
@Cached BranchProfile falseProfile,
@Cached LoopConditionProfile loopProfile) {
@Cached SameOrEqlNode eqlNode,
@Cached InlinedConditionProfile sameProfile,
@Cached InlinedIntValueProfile arraySizeProfile,
@Cached InlinedConditionProfile sameSizeProfile,
@Cached InlinedBranchProfile trueProfile,
@Cached InlinedBranchProfile falseProfile,
@Cached InlinedLoopConditionProfile loopProfile,
@Bind("$node") Node node) {

if (sameProfile.profile(a == b)) {
if (sameProfile.profile(node, a == b)) {
return true;
}

final int size = arraySizeProfile.profile(a.size);
final int size = arraySizeProfile.profile(node, a.size);

if (!sameSizeProfile.profile(size == b.size)) {
if (!sameSizeProfile.profile(node, size == b.size)) {
return false;
}

int i = 0;
try {
for (; loopProfile.inject(i < size); i++) {
if (!eqlNode.execute(stores.read(aStore, i), stores.read(bStore, i))) {
falseProfile.enter();
for (; loopProfile.inject(node, i < size); i++) {
if (!eqlNode.execute(node, stores.read(aStore, i), stores.read(bStore, i))) {
falseProfile.enter(node);
return false;
}
TruffleSafepoint.poll(this);
TruffleSafepoint.poll(node);
}
} finally {
profileAndReportLoopCount(loopProfile, i);
profileAndReportLoopCount(node, loopProfile, i);
}

trueProfile.enter();
trueProfile.enter(node);
return true;
}

Expand Down
46 changes: 34 additions & 12 deletions src/main/java/org/truffleruby/core/hash/CompareHashKeysNode.java
Original file line number Diff line number Diff line change
Expand Up @@ -9,22 +9,23 @@
*/
package org.truffleruby.core.hash;

import com.oracle.truffle.api.dsl.GenerateCached;
import com.oracle.truffle.api.dsl.GenerateInline;
import org.truffleruby.core.basicobject.ReferenceEqualNode;
import org.truffleruby.core.kernel.KernelNodes.SameOrEqlNode;
import org.truffleruby.language.RubyBaseNode;

import com.oracle.truffle.api.dsl.Cached;
import com.oracle.truffle.api.dsl.GenerateUncached;
import com.oracle.truffle.api.dsl.Specialization;
import com.oracle.truffle.api.nodes.Node;
import org.truffleruby.core.basicobject.ReferenceEqualNode;
import org.truffleruby.core.kernel.KernelNodes.SameOrEqlNode;
import org.truffleruby.language.RubyBaseNode;

@GenerateInline
@GenerateCached(false)
@GenerateUncached
public abstract class CompareHashKeysNode extends RubyBaseNode {

public static CompareHashKeysNode getUncached() {
return CompareHashKeysNodeGen.getUncached();
}

public abstract boolean execute(boolean compareByIdentity, Object key, int hashed,
public abstract boolean execute(Node node, boolean compareByIdentity, Object key, int hashed,
Object otherKey, int otherHashed);

/** Checks if the two keys are the same object, which is used by both modes (by identity or not) of lookup. Enables
Expand All @@ -37,14 +38,35 @@ public static boolean referenceEqualKeys(Node node, ReferenceEqualNode refEqual,
}

@Specialization(guards = "compareByIdentity")
boolean refEquals(boolean compareByIdentity, Object key, int hashed, Object otherKey, int otherHashed,
static boolean refEquals(
Node node, boolean compareByIdentity, Object key, int hashed, Object otherKey, int otherHashed,
@Cached ReferenceEqualNode refEqual) {
return refEqual.execute(this, key, otherKey);
return refEqual.execute(node, key, otherKey);
}

@Specialization(guards = "!compareByIdentity")
boolean same(boolean compareByIdentity, Object key, int hashed, Object otherKey, int otherHashed,
static boolean same(Node node, boolean compareByIdentity, Object key, int hashed, Object otherKey, int otherHashed,
@Cached SameOrEqlNode same) {
return hashed == otherHashed && same.execute(key, otherKey);
return hashed == otherHashed && same.execute(node, key, otherKey);
}

@GenerateInline
@GenerateCached(false)
@GenerateUncached
public abstract static class AssumingEqualHashes extends RubyBaseNode {

public abstract boolean execute(Node node, boolean compareByIdentity, Object key, Object otherKey);

@Specialization(guards = "compareByIdentity")
static boolean refEquals(Node node, boolean compareByIdentity, Object key, Object otherKey,
@Cached ReferenceEqualNode refEqual) {
return refEqual.execute(node, key, otherKey);
}

@Specialization(guards = "!compareByIdentity")
static boolean same(Node node, boolean compareByIdentity, Object key, Object otherKey,
@Cached SameOrEqlNode same) {
return same.execute(node, key, otherKey);
}
}
}
12 changes: 10 additions & 2 deletions src/main/java/org/truffleruby/core/hash/HashLiteralNode.java
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,9 @@
*/
package org.truffleruby.core.hash;

import org.truffleruby.RubyLanguage;
import org.truffleruby.core.hash.library.BucketsHashStore;
import org.truffleruby.core.hash.library.CompactHashStore;
import org.truffleruby.core.hash.library.EmptyHashStore;
import org.truffleruby.core.hash.library.PackedHashStoreLibrary;
import org.truffleruby.core.hash.library.PackedHashStoreLibraryFactory;
Expand All @@ -28,13 +30,19 @@ protected HashLiteralNode(RubyNode[] keyValues) {
this.keyValues = keyValues;
}

public static HashLiteralNode create(RubyNode[] keyValues) {
protected int getNumberOfEntries() {
return keyValues.length >> 1;
}

public static HashLiteralNode create(RubyNode[] keyValues, RubyLanguage language) {
if (keyValues.length == 0) {
return new EmptyHashStore.EmptyHashLiteralNode();
} else if (keyValues.length <= PackedHashStoreLibrary.MAX_ENTRIES * 2) {
return PackedHashStoreLibraryFactory.SmallHashLiteralNodeGen.create(keyValues);
} else {
return new BucketsHashStore.GenericHashLiteralNode(keyValues);
return language.options.BIG_HASH_STRATEGY_IS_BUCKETS
? new BucketsHashStore.BucketHashLiteralNode(keyValues)
: new CompactHashStore.CompactHashLiteralNode(keyValues);
}
}

Expand Down
29 changes: 16 additions & 13 deletions src/main/java/org/truffleruby/core/hash/RubyHash.java
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,20 @@

import java.util.Set;

import org.truffleruby.RubyContext;
import org.truffleruby.collections.PEBiFunction;
import org.truffleruby.core.hash.library.BucketsHashStore;
import org.truffleruby.core.hash.library.CompactHashStore;
import org.truffleruby.core.hash.library.HashStoreLibrary;
import org.truffleruby.core.klass.RubyClass;
import org.truffleruby.interop.ForeignToRubyNode;
import org.truffleruby.language.Nil;
import org.truffleruby.language.RubyDynamicObject;
import org.truffleruby.language.dispatch.DispatchNode;
import org.truffleruby.language.objects.IsFrozenNode;
import org.truffleruby.language.objects.ObjectGraph;
import org.truffleruby.language.objects.ObjectGraphNode;

import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary;
import com.oracle.truffle.api.dsl.Bind;
import com.oracle.truffle.api.dsl.Cached;
Expand All @@ -26,20 +40,7 @@
import com.oracle.truffle.api.library.ExportMessage;
import com.oracle.truffle.api.nodes.Node;
import com.oracle.truffle.api.object.Shape;

import com.oracle.truffle.api.profiles.InlinedConditionProfile;
import org.truffleruby.RubyContext;
import org.truffleruby.collections.PEBiFunction;
import org.truffleruby.core.hash.library.BucketsHashStore;
import org.truffleruby.core.hash.library.HashStoreLibrary;
import org.truffleruby.core.klass.RubyClass;
import org.truffleruby.interop.ForeignToRubyNode;
import org.truffleruby.language.Nil;
import org.truffleruby.language.RubyDynamicObject;
import org.truffleruby.language.dispatch.DispatchNode;
import org.truffleruby.language.objects.IsFrozenNode;
import org.truffleruby.language.objects.ObjectGraph;
import org.truffleruby.language.objects.ObjectGraphNode;

@ExportLibrary(InteropLibrary.class)
@ImportStatic(HashGuards.class)
Expand Down Expand Up @@ -86,6 +87,8 @@ public String toString() {
public void getAdjacentObjects(Set<Object> reachable) {
if (store instanceof BucketsHashStore) {
((BucketsHashStore) store).getAdjacentObjects(reachable);
} else if (store instanceof CompactHashStore) {
((CompactHashStore) store).getAdjacentObjects(reachable);
} else {
ObjectGraph.addProperty(reachable, store);
}
Expand Down
Loading

0 comments on commit ca74b40

Please sign in to comment.