Mike Gerwitz

Activist for User Freedom

aboutsummaryrefslogtreecommitdiffstats
path: root/tamer
diff options
context:
space:
mode:
Diffstat (limited to 'tamer')
-rw-r--r--tamer/Cargo.lock17
-rw-r--r--tamer/Cargo.toml2
-rw-r--r--tamer/benches/sym.rs36
-rw-r--r--tamer/src/sym.rs92
4 files changed, 87 insertions, 60 deletions
diff --git a/tamer/Cargo.lock b/tamer/Cargo.lock
index 9340d78..d82d96f 100644
--- a/tamer/Cargo.lock
+++ b/tamer/Cargo.lock
@@ -1,14 +1,22 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
[[package]]
+name = "byteorder"
+version = "1.3.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
+[[package]]
name = "fixedbitset"
version = "0.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
-name = "fnv"
-version = "1.0.6"
+name = "fxhash"
+version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "byteorder 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)",
+]
[[package]]
name = "memchr"
@@ -42,14 +50,15 @@ name = "tamer"
version = "0.0.0"
dependencies = [
"fixedbitset 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)",
- "fnv 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)",
+ "fxhash 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
"petgraph 0.4.13 (registry+https://github.com/rust-lang/crates.io-index)",
"quick-xml 0.17.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[metadata]
+"checksum byteorder 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "a7c3dd8985a7111efc5c80b44e23ecdd8c007de8ade3b96595387e812b957cf5"
"checksum fixedbitset 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)" = "86d4de0081402f5e88cdac65c8dcdcc73118c1a7a465e2a05f0da05843a8ea33"
-"checksum fnv 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)" = "2fad85553e09a6f881f739c29f0b00b0f01357c743266d478b68951ce23285f3"
+"checksum fxhash 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c"
"checksum memchr 2.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "88579771288728879b57485cc7d6b07d648c9f0141eb955f8ab7f9d45394468e"
"checksum ordermap 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "a86ed3f5f244b372d6b1a00b72ef7f8876d0bc6a78a4c9985c53614041512063"
"checksum petgraph 0.4.13 (registry+https://github.com/rust-lang/crates.io-index)" = "9c3659d1ee90221741f65dd128d9998311b0e40c5d3c23a62445938214abce4f"
diff --git a/tamer/Cargo.toml b/tamer/Cargo.toml
index 82cfaf5..a758e5d 100644
--- a/tamer/Cargo.toml
+++ b/tamer/Cargo.toml
@@ -23,7 +23,7 @@ lto = true
lto = true
[dependencies]
-fnv = ">= 1.0.3"
+fxhash = ">= 0.2.1"
petgraph = ">= 0.4.13"
quick-xml = ">= 0.17.0"
# used by petgraph
diff --git a/tamer/benches/sym.rs b/tamer/benches/sym.rs
index f601bbe..9031f11 100644
--- a/tamer/benches/sym.rs
+++ b/tamer/benches/sym.rs
@@ -159,16 +159,16 @@ mod hash_set {
});
}
- mod fnv {
+ mod fx {
use super::*;
- use ::fnv::FnvBuildHasher;
+ use fxhash::FxBuildHasher;
/// This is our baseline with a raw Rc<str>.
#[bench]
fn with_all_new_rc_str_1000_baseline(bench: &mut Bencher) {
let strs = gen_strs(1000);
bench.iter(|| {
- let mut sut = HashSetSut::<FnvBuildHasher>::new();
+ let mut sut = HashSetSut::<FxBuildHasher>::new();
strs.iter().map(|s| sut.intern(&s)).for_each(drop);
});
}
@@ -178,8 +178,7 @@ mod hash_set {
let strs = gen_strs(1000);
bench.iter(|| {
- let mut sut =
- HashSetInterner::<SymbolRc, FnvBuildHasher>::new();
+ let mut sut = HashSetInterner::<SymbolRc, FxBuildHasher>::new();
strs.iter().map(|s| sut.intern(&s)).for_each(drop);
});
}
@@ -188,7 +187,7 @@ mod hash_set {
/// This is our baseline with a raw Rc<str>.
fn with_one_new_rc_str_1000_baseline(bench: &mut Bencher) {
bench.iter(|| {
- let mut sut: HashSetSut<FnvBuildHasher> = HashSetSut {
+ let mut sut: HashSetSut<FxBuildHasher> = HashSetSut {
map: HashSet::with_hasher(Default::default()),
};
(0..1000).map(|_| sut.intern("first")).for_each(drop);
@@ -198,13 +197,12 @@ mod hash_set {
#[bench]
fn with_one_new_1000(bench: &mut Bencher) {
bench.iter(|| {
- let mut sut =
- HashSetInterner::<SymbolRc, FnvBuildHasher>::new();
+ let mut sut = HashSetInterner::<SymbolRc, FxBuildHasher>::new();
(0..1000).map(|_| sut.intern("first")).for_each(drop);
});
}
- /// Since FNV is the best-performing, let's build upon it to demonstrate
+ /// Since Fx is the best-performing, let's build upon it to demonstrate
/// the benefits of with_capacity
#[bench]
fn with_all_new_1000_with_capacity(bench: &mut Bencher) {
@@ -213,7 +211,7 @@ mod hash_set {
bench.iter(|| {
let mut sut =
- HashSetInterner::<SymbolRc, FnvBuildHasher>::with_capacity(
+ HashSetInterner::<SymbolRc, FxBuildHasher>::with_capacity(
n,
);
strs.iter().map(|s| sut.intern(&s)).for_each(drop);
@@ -298,16 +296,16 @@ mod hash_map {
});
}
- mod fnv {
+ mod fx {
use super::*;
- use ::fnv::FnvBuildHasher;
+ use fxhash::FxBuildHasher;
/// This is our baseline with a raw Rc<str>.
#[bench]
fn with_all_new_rc_str_1000_baseline(bench: &mut Bencher) {
let strs = gen_strs(1000);
bench.iter(|| {
- let mut sut = HashMapSut::<(), FnvBuildHasher>::new();
+ let mut sut = HashMapSut::<(), FxBuildHasher>::new();
strs.iter().map(|s| sut.intern(&s)).for_each(drop);
});
}
@@ -318,7 +316,7 @@ mod hash_map {
bench.iter(|| {
let mut sut =
- HashMapInterner::<SymbolRc, (), FnvBuildHasher>::new();
+ HashMapInterner::<SymbolRc, (), FxBuildHasher>::new();
strs.iter().map(|s| sut.intern(&s)).for_each(drop);
});
}
@@ -327,7 +325,7 @@ mod hash_map {
/// This is our baseline with a raw Rc<str>.
fn with_one_new_rc_str_1000_baseline(bench: &mut Bencher) {
bench.iter(|| {
- let mut sut: HashMapSut<(), FnvBuildHasher> = HashMapSut {
+ let mut sut: HashMapSut<(), FxBuildHasher> = HashMapSut {
map: HashMap::with_hasher(Default::default()),
};
(0..1000).map(|_| sut.intern("first")).for_each(drop);
@@ -338,7 +336,7 @@ mod hash_map {
fn with_one_new_1000(bench: &mut Bencher) {
bench.iter(|| {
let mut sut =
- HashMapInterner::<SymbolRc, (), FnvBuildHasher>::new();
+ HashMapInterner::<SymbolRc, (), FxBuildHasher>::new();
(0..1000).map(|_| sut.intern("first")).for_each(drop);
});
}
@@ -352,7 +350,7 @@ mod hash_map {
bench.iter(|| {
let mut sut =
- HashMapInterner::<SymbolRc, (), FnvBuildHasher>::with_capacity(n);
+ HashMapInterner::<SymbolRc, (), FxBuildHasher>::with_capacity(n);
strs.iter().map(|s| sut.intern(&s)).for_each(drop);
});
}
@@ -363,7 +361,7 @@ mod hash_map {
bench.iter(|| {
let mut sut =
- HashMapInterner::<SymbolRc, u8, FnvBuildHasher>::new();
+ HashMapInterner::<SymbolRc, u8, FxBuildHasher>::new();
strs.iter().map(|s| sut.intern_meta(&s, 0)).for_each(drop);
});
}
@@ -374,7 +372,7 @@ mod hash_map {
bench.iter(|| {
let mut sut =
- HashMapInterner::<SymbolRc, u8, FnvBuildHasher>::new();
+ HashMapInterner::<SymbolRc, u8, FxBuildHasher>::new();
strs.iter()
.map(|s| {
sut.intern(&s);
diff --git a/tamer/src/sym.rs b/tamer/src/sym.rs
index 47d0e08..6218811 100644
--- a/tamer/src/sym.rs
+++ b/tamer/src/sym.rs
@@ -30,14 +30,14 @@
//! - [`HashSetInterner`] - Intern pool backed by a [`HashSet`].
//! - [`DefaultSetInterner`] - The currently recommended intern pool
//! configuration for symbol interning without metadata.
-//! - [`FnvHashSetInterner`] - Intern pool backed by a [`HashSet`] using the
-//! Fowler-Noll-Vo hashing algorithm.
+//! - [`FxHashSetInterner`] - Intern pool backed by a [`HashSet`] using the
+//! [Fx Hash][fxhash] hashing algorithm.
//! - [`HashMapInterner`] - Intern pool backed by a [`HashMap`] to provide
//! associated metadata.
//! - [`DefaultMapInterner`] - The currently recommended intern pool
//! configuration for symbol interning with metadata.
-//! - [`FnvHashMapInterner`] - Intern pool backed by a [`HashMap`] using
-//! the Fowler-Noll-Vo hashing algorithm.
+//! - [`FxHashMapInterner`] - Intern pool backed by a [`HashMap`] using
+//! the [Fx Hash][fxhash] hashing algorithm.
//!
//! When basic interning is required,
//! use a set-based interner.
@@ -297,12 +297,42 @@
//! for Rust developed by Mozilla for Servo.
//! - [`string-interner`][rust-string-interner] is another string
//! interning library for Rust.
+//! - [Rustc interns strings as `Symbol`s][rustc-intern] using an
+//! [arena allocator][rustc-arena] and avoids `Rc` by representing
+//! symbols as integer values and converting them to strings using a
+//! global pool and unsafe rust to cast to a `static` slice.
+//! - This was the direction TAMER was initially going to take,
+//! but the simplicity of `Rc` was chosen over unsafe rust until
+//! such an optimization is needed and its benefits demonstrable.
+//! - Rustc identifies symbols by integer value encapsulated within a
+//! `Symbol`.
+//! TAMER does this at the level of the ASG instead,
+//! treating the interner as a separate system.
//!
//! [flyweight pattern]: https://en.wikipedia.org/wiki/Flyweight_pattern
//! [rust-string-cache]: https://github.com/servo/string-cache
//! [rust-string-interner]: https://github.com/robbepop/string-interner
-
-use ::fnv::FnvBuildHasher;
+//! [rustc-intern]: https://doc.rust-lang.org/nightly/nightly-rustc/syntax/ast/struct.Name.html
+//! [rustc-arena]: https://doc.rust-lang.org/nightly/nightly-rustc/arena/index.html
+//!
+//! The hash function chosen for this module is [Fx Hash][fxhash].
+//!
+//! - Rustc previously used the [Fowler-Noll-Vo (FNV)][fnv] hash
+//! function,
+//! but [now uses Fx Hash][rustc-fx].
+//! This was extracted into the [`fxhash`][fxhash] crate,
+//! which is used by TAMER.
+//! - TAMER originally used FNV,
+//! but benchmarks showed that Fx Hash was more performant.
+//! - Benchmarks for other hash functions,
+//! including FNV,
+//! can be found at the [`hash-rs`][hash-rs] project.
+//!
+//! [fnv]: https://doc.servo.org/fnv/
+//! [rustc-fx]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_data_structures/fx/index.html
+//! [hash-rs]: https://github.com/Gankra/hash-rs
+
+use fxhash::FxBuildHasher;
use std::collections::hash_map::{Entry, RandomState};
use std::collections::{HashMap, HashSet};
use std::hash::{BuildHasher, Hash};
@@ -617,32 +647,27 @@ where
}
}
-/// Interner using the [Fowler-Noll-Vo][fnv] (FNV) hashing function.
+/// Interner using the [Fx Hash][fxhash] hashing function.
///
/// _This is currently the hash function used by [`DefaultSetInterner`]._
///
-/// If the intern pool will contains strings primarily of length ≤ 60,
-/// and if denial of service is not a concern,
-/// then use of the FNV hash function will outperform the default
+/// If denial of service is not a concern,
+/// then this will outperform the default
/// [`DefaultHasher`](std::collections::hash_map::DefaultHasher)
/// (which uses SipHash at the time of writing).
-/// See intern benchmarks for a comparison.
-/// For a comparison of various hashing algorithms,
-/// see the [`hash-rs`][hash-rs] project.
///
-/// [hash-rs]: https://github.com/Gankra/hash-rs
-pub type FnvHashSetInterner<T> = HashSetInterner<T, FnvBuildHasher>;
+/// See intern benchmarks for a comparison.
+pub type FxHashSetInterner<T> = HashSetInterner<T, FxBuildHasher>;
/// Recommended configuration for set-based interners.
///
-/// The choice of this default relies on certain assumptions:
-/// - Denial-of-service attacks against the hash function are not a
-/// concern; and
-/// - Strings to be interned are mostly ≤ 60 characters.
+/// The choice of this default relies on the assumption that
+/// denial-of-service attacks against the hash function are not a
+/// concern.
///
/// For more information on the hashing algorithm,
-/// see [`FnvHashSetInterner`].
-pub type DefaultSetInterner = FnvHashSetInterner<SymbolRc>;
+/// see [`FxHashSetInterner`].
+pub type DefaultSetInterner = FxHashSetInterner<SymbolRc>;
/// An interner backed by a [`HashMap`].
///
@@ -749,32 +774,27 @@ where
}
}
-/// Map interner using the [Fowler-Noll-Vo][fnv] (FNV) hashing function.
+/// Interner using the [Fx Hash][fxhash] hashing function.
///
/// _This is currently the hash function used by [`DefaultMapInterner`]._
///
-/// If the intern pool will contains strings primarily of length ≤ 60,
-/// and if denial of service is not a concern,
-/// then use of the FNV hash function will outperform the default
+/// If denial of service is not a concern,
+/// then this will outperform the default
/// [`DefaultHasher`](std::collections::hash_map::DefaultHasher)
/// (which uses SipHash at the time of writing).
-/// See intern benchmarks for a comparison.
-/// For a comparison of various hashing algorithms,
-/// see the [`hash-rs`][hash-rs] project.
///
-/// [hash-rs]: https://github.com/Gankra/hash-rs
-pub type FnvHashMapInterner<T, M> = HashMapInterner<T, M, FnvBuildHasher>;
+/// See intern benchmarks for a comparison.
+pub type FxHashMapInterner<T, M> = HashMapInterner<T, M, FxBuildHasher>;
/// Recommended configuration for map-based interners.
///
-/// The choice of this default relies on certain assumptions:
-/// - Denial-of-service attacks against the hash function are not a
-/// concern; and
-/// - Strings to be interned are mostly ≤ 60 characters.
+/// The choice of this default relies on the assumption that
+/// denial-of-service attacks against the hash function are not a
+/// concern.
///
/// For more information on the hashing algorithm,
-/// see [`FnvHashMapInterner`].
-pub type DefaultMapInterner<M> = FnvHashMapInterner<SymbolRc, M>;
+/// see [`FxHashMapInterner`].
+pub type DefaultMapInterner<M> = FxHashMapInterner<SymbolRc, M>;
#[cfg(test)]
mod test {