diff options
-rw-r--r-- | tamer/Cargo.lock | 17 | ||||
-rw-r--r-- | tamer/Cargo.toml | 2 | ||||
-rw-r--r-- | tamer/benches/sym.rs | 36 | ||||
-rw-r--r-- | tamer/src/sym.rs | 92 |
4 files changed, 87 insertions, 60 deletions
diff --git a/tamer/Cargo.lock b/tamer/Cargo.lock index 9340d78..d82d96f 100644 --- a/tamer/Cargo.lock +++ b/tamer/Cargo.lock @@ -1,14 +1,22 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. [[package]] +name = "byteorder" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] name = "fixedbitset" version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" [[package]] -name = "fnv" -version = "1.0.6" +name = "fxhash" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "byteorder 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)", +] [[package]] name = "memchr" @@ -42,14 +50,15 @@ name = "tamer" version = "0.0.0" dependencies = [ "fixedbitset 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)", - "fnv 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)", + "fxhash 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)", "petgraph 0.4.13 (registry+https://github.com/rust-lang/crates.io-index)", "quick-xml 0.17.0 (registry+https://github.com/rust-lang/crates.io-index)", ] [metadata] +"checksum byteorder 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "a7c3dd8985a7111efc5c80b44e23ecdd8c007de8ade3b96595387e812b957cf5" "checksum fixedbitset 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)" = "86d4de0081402f5e88cdac65c8dcdcc73118c1a7a465e2a05f0da05843a8ea33" -"checksum fnv 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)" = "2fad85553e09a6f881f739c29f0b00b0f01357c743266d478b68951ce23285f3" +"checksum fxhash 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c" "checksum memchr 2.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "88579771288728879b57485cc7d6b07d648c9f0141eb955f8ab7f9d45394468e" "checksum ordermap 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "a86ed3f5f244b372d6b1a00b72ef7f8876d0bc6a78a4c9985c53614041512063" "checksum petgraph 0.4.13 (registry+https://github.com/rust-lang/crates.io-index)" = "9c3659d1ee90221741f65dd128d9998311b0e40c5d3c23a62445938214abce4f" diff --git a/tamer/Cargo.toml b/tamer/Cargo.toml index 82cfaf5..a758e5d 100644 --- a/tamer/Cargo.toml +++ b/tamer/Cargo.toml @@ -23,7 +23,7 @@ lto = true lto = true [dependencies] -fnv = ">= 1.0.3" +fxhash = ">= 0.2.1" petgraph = ">= 0.4.13" quick-xml = ">= 0.17.0" # used by petgraph diff --git a/tamer/benches/sym.rs b/tamer/benches/sym.rs index f601bbe..9031f11 100644 --- a/tamer/benches/sym.rs +++ b/tamer/benches/sym.rs @@ -159,16 +159,16 @@ mod hash_set { }); } - mod fnv { + mod fx { use super::*; - use ::fnv::FnvBuildHasher; + use fxhash::FxBuildHasher; /// This is our baseline with a raw Rc<str>. #[bench] fn with_all_new_rc_str_1000_baseline(bench: &mut Bencher) { let strs = gen_strs(1000); bench.iter(|| { - let mut sut = HashSetSut::<FnvBuildHasher>::new(); + let mut sut = HashSetSut::<FxBuildHasher>::new(); strs.iter().map(|s| sut.intern(&s)).for_each(drop); }); } @@ -178,8 +178,7 @@ mod hash_set { let strs = gen_strs(1000); bench.iter(|| { - let mut sut = - HashSetInterner::<SymbolRc, FnvBuildHasher>::new(); + let mut sut = HashSetInterner::<SymbolRc, FxBuildHasher>::new(); strs.iter().map(|s| sut.intern(&s)).for_each(drop); }); } @@ -188,7 +187,7 @@ mod hash_set { /// This is our baseline with a raw Rc<str>. fn with_one_new_rc_str_1000_baseline(bench: &mut Bencher) { bench.iter(|| { - let mut sut: HashSetSut<FnvBuildHasher> = HashSetSut { + let mut sut: HashSetSut<FxBuildHasher> = HashSetSut { map: HashSet::with_hasher(Default::default()), }; (0..1000).map(|_| sut.intern("first")).for_each(drop); @@ -198,13 +197,12 @@ mod hash_set { #[bench] fn with_one_new_1000(bench: &mut Bencher) { bench.iter(|| { - let mut sut = - HashSetInterner::<SymbolRc, FnvBuildHasher>::new(); + let mut sut = HashSetInterner::<SymbolRc, FxBuildHasher>::new(); (0..1000).map(|_| sut.intern("first")).for_each(drop); }); } - /// Since FNV is the best-performing, let's build upon it to demonstrate + /// Since Fx is the best-performing, let's build upon it to demonstrate /// the benefits of with_capacity #[bench] fn with_all_new_1000_with_capacity(bench: &mut Bencher) { @@ -213,7 +211,7 @@ mod hash_set { bench.iter(|| { let mut sut = - HashSetInterner::<SymbolRc, FnvBuildHasher>::with_capacity( + HashSetInterner::<SymbolRc, FxBuildHasher>::with_capacity( n, ); strs.iter().map(|s| sut.intern(&s)).for_each(drop); @@ -298,16 +296,16 @@ mod hash_map { }); } - mod fnv { + mod fx { use super::*; - use ::fnv::FnvBuildHasher; + use fxhash::FxBuildHasher; /// This is our baseline with a raw Rc<str>. #[bench] fn with_all_new_rc_str_1000_baseline(bench: &mut Bencher) { let strs = gen_strs(1000); bench.iter(|| { - let mut sut = HashMapSut::<(), FnvBuildHasher>::new(); + let mut sut = HashMapSut::<(), FxBuildHasher>::new(); strs.iter().map(|s| sut.intern(&s)).for_each(drop); }); } @@ -318,7 +316,7 @@ mod hash_map { bench.iter(|| { let mut sut = - HashMapInterner::<SymbolRc, (), FnvBuildHasher>::new(); + HashMapInterner::<SymbolRc, (), FxBuildHasher>::new(); strs.iter().map(|s| sut.intern(&s)).for_each(drop); }); } @@ -327,7 +325,7 @@ mod hash_map { /// This is our baseline with a raw Rc<str>. fn with_one_new_rc_str_1000_baseline(bench: &mut Bencher) { bench.iter(|| { - let mut sut: HashMapSut<(), FnvBuildHasher> = HashMapSut { + let mut sut: HashMapSut<(), FxBuildHasher> = HashMapSut { map: HashMap::with_hasher(Default::default()), }; (0..1000).map(|_| sut.intern("first")).for_each(drop); @@ -338,7 +336,7 @@ mod hash_map { fn with_one_new_1000(bench: &mut Bencher) { bench.iter(|| { let mut sut = - HashMapInterner::<SymbolRc, (), FnvBuildHasher>::new(); + HashMapInterner::<SymbolRc, (), FxBuildHasher>::new(); (0..1000).map(|_| sut.intern("first")).for_each(drop); }); } @@ -352,7 +350,7 @@ mod hash_map { bench.iter(|| { let mut sut = - HashMapInterner::<SymbolRc, (), FnvBuildHasher>::with_capacity(n); + HashMapInterner::<SymbolRc, (), FxBuildHasher>::with_capacity(n); strs.iter().map(|s| sut.intern(&s)).for_each(drop); }); } @@ -363,7 +361,7 @@ mod hash_map { bench.iter(|| { let mut sut = - HashMapInterner::<SymbolRc, u8, FnvBuildHasher>::new(); + HashMapInterner::<SymbolRc, u8, FxBuildHasher>::new(); strs.iter().map(|s| sut.intern_meta(&s, 0)).for_each(drop); }); } @@ -374,7 +372,7 @@ mod hash_map { bench.iter(|| { let mut sut = - HashMapInterner::<SymbolRc, u8, FnvBuildHasher>::new(); + HashMapInterner::<SymbolRc, u8, FxBuildHasher>::new(); strs.iter() .map(|s| { sut.intern(&s); diff --git a/tamer/src/sym.rs b/tamer/src/sym.rs index 47d0e08..6218811 100644 --- a/tamer/src/sym.rs +++ b/tamer/src/sym.rs @@ -30,14 +30,14 @@ //! - [`HashSetInterner`] - Intern pool backed by a [`HashSet`]. //! - [`DefaultSetInterner`] - The currently recommended intern pool //! configuration for symbol interning without metadata. -//! - [`FnvHashSetInterner`] - Intern pool backed by a [`HashSet`] using the -//! Fowler-Noll-Vo hashing algorithm. +//! - [`FxHashSetInterner`] - Intern pool backed by a [`HashSet`] using the +//! [Fx Hash][fxhash] hashing algorithm. //! - [`HashMapInterner`] - Intern pool backed by a [`HashMap`] to provide //! associated metadata. //! - [`DefaultMapInterner`] - The currently recommended intern pool //! configuration for symbol interning with metadata. -//! - [`FnvHashMapInterner`] - Intern pool backed by a [`HashMap`] using -//! the Fowler-Noll-Vo hashing algorithm. +//! - [`FxHashMapInterner`] - Intern pool backed by a [`HashMap`] using +//! the [Fx Hash][fxhash] hashing algorithm. //! //! When basic interning is required, //! use a set-based interner. @@ -297,12 +297,42 @@ //! for Rust developed by Mozilla for Servo. //! - [`string-interner`][rust-string-interner] is another string //! interning library for Rust. +//! - [Rustc interns strings as `Symbol`s][rustc-intern] using an +//! [arena allocator][rustc-arena] and avoids `Rc` by representing +//! symbols as integer values and converting them to strings using a +//! global pool and unsafe rust to cast to a `static` slice. +//! - This was the direction TAMER was initially going to take, +//! but the simplicity of `Rc` was chosen over unsafe rust until +//! such an optimization is needed and its benefits demonstrable. +//! - Rustc identifies symbols by integer value encapsulated within a +//! `Symbol`. +//! TAMER does this at the level of the ASG instead, +//! treating the interner as a separate system. //! //! [flyweight pattern]: https://en.wikipedia.org/wiki/Flyweight_pattern //! [rust-string-cache]: https://github.com/servo/string-cache //! [rust-string-interner]: https://github.com/robbepop/string-interner - -use ::fnv::FnvBuildHasher; +//! [rustc-intern]: https://doc.rust-lang.org/nightly/nightly-rustc/syntax/ast/struct.Name.html +//! [rustc-arena]: https://doc.rust-lang.org/nightly/nightly-rustc/arena/index.html +//! +//! The hash function chosen for this module is [Fx Hash][fxhash]. +//! +//! - Rustc previously used the [Fowler-Noll-Vo (FNV)][fnv] hash +//! function, +//! but [now uses Fx Hash][rustc-fx]. +//! This was extracted into the [`fxhash`][fxhash] crate, +//! which is used by TAMER. +//! - TAMER originally used FNV, +//! but benchmarks showed that Fx Hash was more performant. +//! - Benchmarks for other hash functions, +//! including FNV, +//! can be found at the [`hash-rs`][hash-rs] project. +//! +//! [fnv]: https://doc.servo.org/fnv/ +//! [rustc-fx]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_data_structures/fx/index.html +//! [hash-rs]: https://github.com/Gankra/hash-rs + +use fxhash::FxBuildHasher; use std::collections::hash_map::{Entry, RandomState}; use std::collections::{HashMap, HashSet}; use std::hash::{BuildHasher, Hash}; @@ -617,32 +647,27 @@ where } } -/// Interner using the [Fowler-Noll-Vo][fnv] (FNV) hashing function. +/// Interner using the [Fx Hash][fxhash] hashing function. /// /// _This is currently the hash function used by [`DefaultSetInterner`]._ /// -/// If the intern pool will contains strings primarily of length ≤ 60, -/// and if denial of service is not a concern, -/// then use of the FNV hash function will outperform the default +/// If denial of service is not a concern, +/// then this will outperform the default /// [`DefaultHasher`](std::collections::hash_map::DefaultHasher) /// (which uses SipHash at the time of writing). -/// See intern benchmarks for a comparison. -/// For a comparison of various hashing algorithms, -/// see the [`hash-rs`][hash-rs] project. /// -/// [hash-rs]: https://github.com/Gankra/hash-rs -pub type FnvHashSetInterner<T> = HashSetInterner<T, FnvBuildHasher>; +/// See intern benchmarks for a comparison. +pub type FxHashSetInterner<T> = HashSetInterner<T, FxBuildHasher>; /// Recommended configuration for set-based interners. /// -/// The choice of this default relies on certain assumptions: -/// - Denial-of-service attacks against the hash function are not a -/// concern; and -/// - Strings to be interned are mostly ≤ 60 characters. +/// The choice of this default relies on the assumption that +/// denial-of-service attacks against the hash function are not a +/// concern. /// /// For more information on the hashing algorithm, -/// see [`FnvHashSetInterner`]. -pub type DefaultSetInterner = FnvHashSetInterner<SymbolRc>; +/// see [`FxHashSetInterner`]. +pub type DefaultSetInterner = FxHashSetInterner<SymbolRc>; /// An interner backed by a [`HashMap`]. /// @@ -749,32 +774,27 @@ where } } -/// Map interner using the [Fowler-Noll-Vo][fnv] (FNV) hashing function. +/// Interner using the [Fx Hash][fxhash] hashing function. /// /// _This is currently the hash function used by [`DefaultMapInterner`]._ /// -/// If the intern pool will contains strings primarily of length ≤ 60, -/// and if denial of service is not a concern, -/// then use of the FNV hash function will outperform the default +/// If denial of service is not a concern, +/// then this will outperform the default /// [`DefaultHasher`](std::collections::hash_map::DefaultHasher) /// (which uses SipHash at the time of writing). -/// See intern benchmarks for a comparison. -/// For a comparison of various hashing algorithms, -/// see the [`hash-rs`][hash-rs] project. /// -/// [hash-rs]: https://github.com/Gankra/hash-rs -pub type FnvHashMapInterner<T, M> = HashMapInterner<T, M, FnvBuildHasher>; +/// See intern benchmarks for a comparison. +pub type FxHashMapInterner<T, M> = HashMapInterner<T, M, FxBuildHasher>; /// Recommended configuration for map-based interners. /// -/// The choice of this default relies on certain assumptions: -/// - Denial-of-service attacks against the hash function are not a -/// concern; and -/// - Strings to be interned are mostly ≤ 60 characters. +/// The choice of this default relies on the assumption that +/// denial-of-service attacks against the hash function are not a +/// concern. /// /// For more information on the hashing algorithm, -/// see [`FnvHashMapInterner`]. -pub type DefaultMapInterner<M> = FnvHashMapInterner<SymbolRc, M>; +/// see [`FxHashMapInterner`]. +pub type DefaultMapInterner<M> = FxHashMapInterner<SymbolRc, M>; #[cfg(test)] mod test { |