Mike Gerwitz

Activist for User Freedom

aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMike Gerwitz <mike.gerwitz@ryansg.com>2019-12-10 15:32:25 -0500
committerMike Gerwitz <mike.gerwitz@ryansg.com>2020-02-24 14:56:28 -0500
commit176d099fb61a49492515a0fd35fc19b4f2b70ce9 (patch)
tree3503ce3f2cf27aaf26fcc5cb533b251aa4f96e34
parent0d2bb5de598019bf203ac9e151031f11df98550d (diff)
downloadtame-176d099fb61a49492515a0fd35fc19b4f2b70ce9.tar.gz
tame-176d099fb61a49492515a0fd35fc19b4f2b70ce9.tar.bz2
tame-176d099fb61a49492515a0fd35fc19b4f2b70ce9.zip
tamer::sym: FNV => Fx Hash
For strings of any notable length, Fx Hash outperforms FNV. Rustc also moved to this hash function and noticed performance improvements. Fortunately, as was accounted for in the design, this was a trivial switch. Here are some benchmarks to back up that claim: test hash_set::fnv::with_all_new_1000 ... bench: 133,096 ns/iter (+/- 1,430) test hash_set::fnv::with_all_new_1000_with_capacity ... bench: 82,591 ns/iter (+/- 592) test hash_set::fnv::with_all_new_rc_str_1000_baseline ... bench: 162,073 ns/iter (+/- 1,277) test hash_set::fnv::with_one_new_1000 ... bench: 37,334 ns/iter (+/- 256) test hash_set::fnv::with_one_new_rc_str_1000_baseline ... bench: 18,263 ns/iter (+/- 261) test hash_set::fx::with_all_new_1000 ... bench: 85,217 ns/iter (+/- 1,111) test hash_set::fx::with_all_new_1000_with_capacity ... bench: 59,383 ns/iter (+/- 752) test hash_set::fx::with_all_new_rc_str_1000_baseline ... bench: 98,802 ns/iter (+/- 1,117) test hash_set::fx::with_one_new_1000 ... bench: 42,484 ns/iter (+/- 1,239) test hash_set::fx::with_one_new_rc_str_1000_baseline ... bench: 15,000 ns/iter (+/- 233) test hash_set::with_all_new_1000 ... bench: 137,645 ns/iter (+/- 1,186) test hash_set::with_all_new_rc_str_1000_baseline ... bench: 163,129 ns/iter (+/- 1,725) test hash_set::with_one_new_1000 ... bench: 59,051 ns/iter (+/- 1,202) test hash_set::with_one_new_rc_str_1000_baseline ... bench: 37,986 ns/iter (+/- 771)
-rw-r--r--tamer/Cargo.lock17
-rw-r--r--tamer/Cargo.toml2
-rw-r--r--tamer/benches/sym.rs36
-rw-r--r--tamer/src/sym.rs92
4 files changed, 87 insertions, 60 deletions
diff --git a/tamer/Cargo.lock b/tamer/Cargo.lock
index 9340d78..d82d96f 100644
--- a/tamer/Cargo.lock
+++ b/tamer/Cargo.lock
@@ -1,14 +1,22 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
[[package]]
+name = "byteorder"
+version = "1.3.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
+[[package]]
name = "fixedbitset"
version = "0.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
-name = "fnv"
-version = "1.0.6"
+name = "fxhash"
+version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "byteorder 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)",
+]
[[package]]
name = "memchr"
@@ -42,14 +50,15 @@ name = "tamer"
version = "0.0.0"
dependencies = [
"fixedbitset 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)",
- "fnv 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)",
+ "fxhash 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
"petgraph 0.4.13 (registry+https://github.com/rust-lang/crates.io-index)",
"quick-xml 0.17.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[metadata]
+"checksum byteorder 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "a7c3dd8985a7111efc5c80b44e23ecdd8c007de8ade3b96595387e812b957cf5"
"checksum fixedbitset 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)" = "86d4de0081402f5e88cdac65c8dcdcc73118c1a7a465e2a05f0da05843a8ea33"
-"checksum fnv 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)" = "2fad85553e09a6f881f739c29f0b00b0f01357c743266d478b68951ce23285f3"
+"checksum fxhash 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c"
"checksum memchr 2.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "88579771288728879b57485cc7d6b07d648c9f0141eb955f8ab7f9d45394468e"
"checksum ordermap 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "a86ed3f5f244b372d6b1a00b72ef7f8876d0bc6a78a4c9985c53614041512063"
"checksum petgraph 0.4.13 (registry+https://github.com/rust-lang/crates.io-index)" = "9c3659d1ee90221741f65dd128d9998311b0e40c5d3c23a62445938214abce4f"
diff --git a/tamer/Cargo.toml b/tamer/Cargo.toml
index 82cfaf5..a758e5d 100644
--- a/tamer/Cargo.toml
+++ b/tamer/Cargo.toml
@@ -23,7 +23,7 @@ lto = true
lto = true
[dependencies]
-fnv = ">= 1.0.3"
+fxhash = ">= 0.2.1"
petgraph = ">= 0.4.13"
quick-xml = ">= 0.17.0"
# used by petgraph
diff --git a/tamer/benches/sym.rs b/tamer/benches/sym.rs
index f601bbe..9031f11 100644
--- a/tamer/benches/sym.rs
+++ b/tamer/benches/sym.rs
@@ -159,16 +159,16 @@ mod hash_set {
});
}
- mod fnv {
+ mod fx {
use super::*;
- use ::fnv::FnvBuildHasher;
+ use fxhash::FxBuildHasher;
/// This is our baseline with a raw Rc<str>.
#[bench]
fn with_all_new_rc_str_1000_baseline(bench: &mut Bencher) {
let strs = gen_strs(1000);
bench.iter(|| {
- let mut sut = HashSetSut::<FnvBuildHasher>::new();
+ let mut sut = HashSetSut::<FxBuildHasher>::new();
strs.iter().map(|s| sut.intern(&s)).for_each(drop);
});
}
@@ -178,8 +178,7 @@ mod hash_set {
let strs = gen_strs(1000);
bench.iter(|| {
- let mut sut =
- HashSetInterner::<SymbolRc, FnvBuildHasher>::new();
+ let mut sut = HashSetInterner::<SymbolRc, FxBuildHasher>::new();
strs.iter().map(|s| sut.intern(&s)).for_each(drop);
});
}
@@ -188,7 +187,7 @@ mod hash_set {
/// This is our baseline with a raw Rc<str>.
fn with_one_new_rc_str_1000_baseline(bench: &mut Bencher) {
bench.iter(|| {
- let mut sut: HashSetSut<FnvBuildHasher> = HashSetSut {
+ let mut sut: HashSetSut<FxBuildHasher> = HashSetSut {
map: HashSet::with_hasher(Default::default()),
};
(0..1000).map(|_| sut.intern("first")).for_each(drop);
@@ -198,13 +197,12 @@ mod hash_set {
#[bench]
fn with_one_new_1000(bench: &mut Bencher) {
bench.iter(|| {
- let mut sut =
- HashSetInterner::<SymbolRc, FnvBuildHasher>::new();
+ let mut sut = HashSetInterner::<SymbolRc, FxBuildHasher>::new();
(0..1000).map(|_| sut.intern("first")).for_each(drop);
});
}
- /// Since FNV is the best-performing, let's build upon it to demonstrate
+ /// Since Fx is the best-performing, let's build upon it to demonstrate
/// the benefits of with_capacity
#[bench]
fn with_all_new_1000_with_capacity(bench: &mut Bencher) {
@@ -213,7 +211,7 @@ mod hash_set {
bench.iter(|| {
let mut sut =
- HashSetInterner::<SymbolRc, FnvBuildHasher>::with_capacity(
+ HashSetInterner::<SymbolRc, FxBuildHasher>::with_capacity(
n,
);
strs.iter().map(|s| sut.intern(&s)).for_each(drop);
@@ -298,16 +296,16 @@ mod hash_map {
});
}
- mod fnv {
+ mod fx {
use super::*;
- use ::fnv::FnvBuildHasher;
+ use fxhash::FxBuildHasher;
/// This is our baseline with a raw Rc<str>.
#[bench]
fn with_all_new_rc_str_1000_baseline(bench: &mut Bencher) {
let strs = gen_strs(1000);
bench.iter(|| {
- let mut sut = HashMapSut::<(), FnvBuildHasher>::new();
+ let mut sut = HashMapSut::<(), FxBuildHasher>::new();
strs.iter().map(|s| sut.intern(&s)).for_each(drop);
});
}
@@ -318,7 +316,7 @@ mod hash_map {
bench.iter(|| {
let mut sut =
- HashMapInterner::<SymbolRc, (), FnvBuildHasher>::new();
+ HashMapInterner::<SymbolRc, (), FxBuildHasher>::new();
strs.iter().map(|s| sut.intern(&s)).for_each(drop);
});
}
@@ -327,7 +325,7 @@ mod hash_map {
/// This is our baseline with a raw Rc<str>.
fn with_one_new_rc_str_1000_baseline(bench: &mut Bencher) {
bench.iter(|| {
- let mut sut: HashMapSut<(), FnvBuildHasher> = HashMapSut {
+ let mut sut: HashMapSut<(), FxBuildHasher> = HashMapSut {
map: HashMap::with_hasher(Default::default()),
};
(0..1000).map(|_| sut.intern("first")).for_each(drop);
@@ -338,7 +336,7 @@ mod hash_map {
fn with_one_new_1000(bench: &mut Bencher) {
bench.iter(|| {
let mut sut =
- HashMapInterner::<SymbolRc, (), FnvBuildHasher>::new();
+ HashMapInterner::<SymbolRc, (), FxBuildHasher>::new();
(0..1000).map(|_| sut.intern("first")).for_each(drop);
});
}
@@ -352,7 +350,7 @@ mod hash_map {
bench.iter(|| {
let mut sut =
- HashMapInterner::<SymbolRc, (), FnvBuildHasher>::with_capacity(n);
+ HashMapInterner::<SymbolRc, (), FxBuildHasher>::with_capacity(n);
strs.iter().map(|s| sut.intern(&s)).for_each(drop);
});
}
@@ -363,7 +361,7 @@ mod hash_map {
bench.iter(|| {
let mut sut =
- HashMapInterner::<SymbolRc, u8, FnvBuildHasher>::new();
+ HashMapInterner::<SymbolRc, u8, FxBuildHasher>::new();
strs.iter().map(|s| sut.intern_meta(&s, 0)).for_each(drop);
});
}
@@ -374,7 +372,7 @@ mod hash_map {
bench.iter(|| {
let mut sut =
- HashMapInterner::<SymbolRc, u8, FnvBuildHasher>::new();
+ HashMapInterner::<SymbolRc, u8, FxBuildHasher>::new();
strs.iter()
.map(|s| {
sut.intern(&s);
diff --git a/tamer/src/sym.rs b/tamer/src/sym.rs
index 47d0e08..6218811 100644
--- a/tamer/src/sym.rs
+++ b/tamer/src/sym.rs
@@ -30,14 +30,14 @@
//! - [`HashSetInterner`] - Intern pool backed by a [`HashSet`].
//! - [`DefaultSetInterner`] - The currently recommended intern pool
//! configuration for symbol interning without metadata.
-//! - [`FnvHashSetInterner`] - Intern pool backed by a [`HashSet`] using the
-//! Fowler-Noll-Vo hashing algorithm.
+//! - [`FxHashSetInterner`] - Intern pool backed by a [`HashSet`] using the
+//! [Fx Hash][fxhash] hashing algorithm.
//! - [`HashMapInterner`] - Intern pool backed by a [`HashMap`] to provide
//! associated metadata.
//! - [`DefaultMapInterner`] - The currently recommended intern pool
//! configuration for symbol interning with metadata.
-//! - [`FnvHashMapInterner`] - Intern pool backed by a [`HashMap`] using
-//! the Fowler-Noll-Vo hashing algorithm.
+//! - [`FxHashMapInterner`] - Intern pool backed by a [`HashMap`] using
+//! the [Fx Hash][fxhash] hashing algorithm.
//!
//! When basic interning is required,
//! use a set-based interner.
@@ -297,12 +297,42 @@
//! for Rust developed by Mozilla for Servo.
//! - [`string-interner`][rust-string-interner] is another string
//! interning library for Rust.
+//! - [Rustc interns strings as `Symbol`s][rustc-intern] using an
+//! [arena allocator][rustc-arena] and avoids `Rc` by representing
+//! symbols as integer values and converting them to strings using a
+//! global pool and unsafe rust to cast to a `static` slice.
+//! - This was the direction TAMER was initially going to take,
+//! but the simplicity of `Rc` was chosen over unsafe rust until
+//! such an optimization is needed and its benefits demonstrable.
+//! - Rustc identifies symbols by integer value encapsulated within a
+//! `Symbol`.
+//! TAMER does this at the level of the ASG instead,
+//! treating the interner as a separate system.
//!
//! [flyweight pattern]: https://en.wikipedia.org/wiki/Flyweight_pattern
//! [rust-string-cache]: https://github.com/servo/string-cache
//! [rust-string-interner]: https://github.com/robbepop/string-interner
-
-use ::fnv::FnvBuildHasher;
+//! [rustc-intern]: https://doc.rust-lang.org/nightly/nightly-rustc/syntax/ast/struct.Name.html
+//! [rustc-arena]: https://doc.rust-lang.org/nightly/nightly-rustc/arena/index.html
+//!
+//! The hash function chosen for this module is [Fx Hash][fxhash].
+//!
+//! - Rustc previously used the [Fowler-Noll-Vo (FNV)][fnv] hash
+//! function,
+//! but [now uses Fx Hash][rustc-fx].
+//! This was extracted into the [`fxhash`][fxhash] crate,
+//! which is used by TAMER.
+//! - TAMER originally used FNV,
+//! but benchmarks showed that Fx Hash was more performant.
+//! - Benchmarks for other hash functions,
+//! including FNV,
+//! can be found at the [`hash-rs`][hash-rs] project.
+//!
+//! [fnv]: https://doc.servo.org/fnv/
+//! [rustc-fx]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_data_structures/fx/index.html
+//! [hash-rs]: https://github.com/Gankra/hash-rs
+
+use fxhash::FxBuildHasher;
use std::collections::hash_map::{Entry, RandomState};
use std::collections::{HashMap, HashSet};
use std::hash::{BuildHasher, Hash};
@@ -617,32 +647,27 @@ where
}
}
-/// Interner using the [Fowler-Noll-Vo][fnv] (FNV) hashing function.
+/// Interner using the [Fx Hash][fxhash] hashing function.
///
/// _This is currently the hash function used by [`DefaultSetInterner`]._
///
-/// If the intern pool will contains strings primarily of length ≤ 60,
-/// and if denial of service is not a concern,
-/// then use of the FNV hash function will outperform the default
+/// If denial of service is not a concern,
+/// then this will outperform the default
/// [`DefaultHasher`](std::collections::hash_map::DefaultHasher)
/// (which uses SipHash at the time of writing).
-/// See intern benchmarks for a comparison.
-/// For a comparison of various hashing algorithms,
-/// see the [`hash-rs`][hash-rs] project.
///
-/// [hash-rs]: https://github.com/Gankra/hash-rs
-pub type FnvHashSetInterner<T> = HashSetInterner<T, FnvBuildHasher>;
+/// See intern benchmarks for a comparison.
+pub type FxHashSetInterner<T> = HashSetInterner<T, FxBuildHasher>;
/// Recommended configuration for set-based interners.
///
-/// The choice of this default relies on certain assumptions:
-/// - Denial-of-service attacks against the hash function are not a
-/// concern; and
-/// - Strings to be interned are mostly ≤ 60 characters.
+/// The choice of this default relies on the assumption that
+/// denial-of-service attacks against the hash function are not a
+/// concern.
///
/// For more information on the hashing algorithm,
-/// see [`FnvHashSetInterner`].
-pub type DefaultSetInterner = FnvHashSetInterner<SymbolRc>;
+/// see [`FxHashSetInterner`].
+pub type DefaultSetInterner = FxHashSetInterner<SymbolRc>;
/// An interner backed by a [`HashMap`].
///
@@ -749,32 +774,27 @@ where
}
}
-/// Map interner using the [Fowler-Noll-Vo][fnv] (FNV) hashing function.
+/// Interner using the [Fx Hash][fxhash] hashing function.
///
/// _This is currently the hash function used by [`DefaultMapInterner`]._
///
-/// If the intern pool will contains strings primarily of length ≤ 60,
-/// and if denial of service is not a concern,
-/// then use of the FNV hash function will outperform the default
+/// If denial of service is not a concern,
+/// then this will outperform the default
/// [`DefaultHasher`](std::collections::hash_map::DefaultHasher)
/// (which uses SipHash at the time of writing).
-/// See intern benchmarks for a comparison.
-/// For a comparison of various hashing algorithms,
-/// see the [`hash-rs`][hash-rs] project.
///
-/// [hash-rs]: https://github.com/Gankra/hash-rs
-pub type FnvHashMapInterner<T, M> = HashMapInterner<T, M, FnvBuildHasher>;
+/// See intern benchmarks for a comparison.
+pub type FxHashMapInterner<T, M> = HashMapInterner<T, M, FxBuildHasher>;
/// Recommended configuration for map-based interners.
///
-/// The choice of this default relies on certain assumptions:
-/// - Denial-of-service attacks against the hash function are not a
-/// concern; and
-/// - Strings to be interned are mostly ≤ 60 characters.
+/// The choice of this default relies on the assumption that
+/// denial-of-service attacks against the hash function are not a
+/// concern.
///
/// For more information on the hashing algorithm,
-/// see [`FnvHashMapInterner`].
-pub type DefaultMapInterner<M> = FnvHashMapInterner<SymbolRc, M>;
+/// see [`FxHashMapInterner`].
+pub type DefaultMapInterner<M> = FxHashMapInterner<SymbolRc, M>;
#[cfg(test)]
mod test {