diff options
author | Mike Gerwitz <mike.gerwitz@ryansg.com> | 2019-12-23 23:26:42 -0500 |
---|---|---|
committer | Mike Gerwitz <mike.gerwitz@ryansg.com> | 2020-02-24 14:56:28 -0500 |
commit | 1f4db84f248332a1a891e4d9b7ca28cf3afc547d (patch) | |
tree | 73dd8525689fda1b346a1ab05f4e642eed3a76f4 /tamer/benches | |
parent | 176d099fb61a49492515a0fd35fc19b4f2b70ce9 (diff) | |
download | tame-1f4db84f248332a1a891e4d9b7ca28cf3afc547d.tar.gz tame-1f4db84f248332a1a891e4d9b7ca28cf3afc547d.tar.bz2 tame-1f4db84f248332a1a891e4d9b7ca28cf3afc547d.zip |
TAMER: Arena-based string interner
Contrary to what I said previously, this replaces the previous
implementation with an arena-backed internment system. The motivation for
this change was investigating how Rustc performed its string interning, and
why they chose to associate integer identifiers with symbols.
The intent was originally to use Rustc's arena allocator directly, but that
create pulled in far too many dependencies and depended on nightly
Rust. Bumpalo provides a very similar implementation to Rustc's
DroplessArena, so I went with that instead.
Rustc also relies on a global, singleton interner. I do not do that
here. Instead, the returned Symbol carries a lifetime of the underlying
arena, as well as a pointer to the interned string.
Now that this is put to rest, it's time to move on.
Diffstat (limited to 'tamer/benches')
-rw-r--r-- | tamer/benches/sym.rs | 233 |
1 files changed, 6 insertions, 227 deletions
diff --git a/tamer/benches/sym.rs b/tamer/benches/sym.rs index 9031f11..2e1c894 100644 --- a/tamer/benches/sym.rs +++ b/tamer/benches/sym.rs @@ -28,67 +28,13 @@ use std::rc::Rc; use tamer::sym::*; use test::Bencher; -mod symbol { - use super::*; - - /// This is our baseline. We should never be any slower than this. - #[bench] - fn new_raw_rc_1000_baseline(bench: &mut Bencher) { - let s = "foo bar baz"; - - bench.iter(|| { - (0..1000) - .map(|_| { - let _: Rc<str> = s.into(); - }) - .for_each(drop); - }); - } - - /// Using the `SymbolRc` wrapper should perform no differently than the - /// above test with `Rc<str>`. - #[bench] - fn new_symbol_rc_1000(bench: &mut Bencher) { - let s = "foo bar baz"; - - bench.iter(|| { - (0..1000).map(|_| SymbolRc::new(s)).for_each(drop); - }); - } - - /// Rc uses pointer comparisons when possible. We want to match that. - #[bench] - fn eq_check_rc_baseline(bench: &mut Bencher) { - bench.iter(|| { - let a: Rc<str> = "foobarbazquux".into(); - let b: Rc<str> = "foobarbazquux".into(); - let c: Rc<str> = "foobarbazquuxx".into(); - - let _ = a == b; - let _ = a == c; - }); - } - - #[bench] - fn eq_check_symbol_rc_baseline(bench: &mut Bencher) { - bench.iter(|| { - let a: SymbolRc = "foobarbazquux".into(); - let b: SymbolRc = "foobarbazquux".into(); - let c: SymbolRc = "foobarbazquuxx".into(); - - let _ = a == b; - let _ = a == c; - }); - } -} - fn gen_strs(n: usize) -> Vec<String> { (0..n) .map(|n| n.to_string() + "foobarbazquuxlongsymbol") .collect() } -mod hash_set { +mod interner { use super::*; use std::collections::hash_map::RandomState; use std::collections::HashSet; @@ -137,7 +83,7 @@ mod hash_set { let strs = gen_strs(1000); bench.iter(|| { - let mut sut = HashSetInterner::<SymbolRc>::new(); + let sut = ArenaInterner::<RandomState>::new(); strs.iter().map(|s| sut.intern(&s)).for_each(drop); }); } @@ -154,7 +100,7 @@ mod hash_set { #[bench] fn with_one_new_1000(bench: &mut Bencher) { bench.iter(|| { - let mut sut = HashSetInterner::<SymbolRc>::new(); + let sut = ArenaInterner::<RandomState>::new(); (0..1000).map(|_| sut.intern("first")).for_each(drop); }); } @@ -178,7 +124,7 @@ mod hash_set { let strs = gen_strs(1000); bench.iter(|| { - let mut sut = HashSetInterner::<SymbolRc, FxBuildHasher>::new(); + let sut = ArenaInterner::<FxBuildHasher>::new(); strs.iter().map(|s| sut.intern(&s)).for_each(drop); }); } @@ -197,7 +143,7 @@ mod hash_set { #[bench] fn with_one_new_1000(bench: &mut Bencher) { bench.iter(|| { - let mut sut = HashSetInterner::<SymbolRc, FxBuildHasher>::new(); + let sut = ArenaInterner::<FxBuildHasher>::new(); (0..1000).map(|_| sut.intern("first")).for_each(drop); }); } @@ -210,176 +156,9 @@ mod hash_set { let strs = gen_strs(n); bench.iter(|| { - let mut sut = - HashSetInterner::<SymbolRc, FxBuildHasher>::with_capacity( - n, - ); + let sut = ArenaInterner::<FxBuildHasher>::with_capacity(n); strs.iter().map(|s| sut.intern(&s)).for_each(drop); }); } } } - -mod hash_map { - use super::*; - use std::collections::hash_map::{Entry, RandomState}; - use std::collections::HashMap; - use std::hash::BuildHasher; - - pub struct HashMapSut<M, S = RandomState> - where - S: BuildHasher, - { - pub map: HashMap<Rc<str>, M, S>, - } - - impl<M, S> HashMapSut<M, S> - where - M: Default, - S: BuildHasher + Default, - { - #[inline] - fn new() -> Self { - Self { - map: HashMap::with_hasher(Default::default()), - } - } - - pub fn intern(&mut self, value: &str) -> Rc<str> { - match self.map.entry(value.into()) { - Entry::Vacant(v) => { - let intern = v.key().clone(); - - v.insert(Default::default()); - intern - } - Entry::Occupied(o) => o.key().clone(), - } - } - } - - /// This is our baseline with a raw Rc<str>. - #[bench] - fn with_all_new_rc_str_1000_baseline(bench: &mut Bencher) { - let strs = gen_strs(1000); - - bench.iter(|| { - let mut sut = HashMapSut::<(), RandomState>::new(); - strs.iter().map(|s| sut.intern(&s)).for_each(drop); - }); - } - - #[bench] - fn with_all_new_1000(bench: &mut Bencher) { - let strs = gen_strs(1000); - - bench.iter(|| { - let mut sut = HashMapInterner::<SymbolRc, ()>::new(); - strs.iter().map(|s| sut.intern(&s)).for_each(drop); - }); - } - - #[bench] - /// This is our baseline with a raw Rc<str>. - fn with_one_new_rc_str_1000_baseline(bench: &mut Bencher) { - bench.iter(|| { - let mut sut = HashMapSut::<(), RandomState>::new(); - (0..1000).map(|_| sut.intern("first")).for_each(drop); - }); - } - - #[bench] - fn with_one_new_1000(bench: &mut Bencher) { - bench.iter(|| { - let mut sut = HashMapInterner::<SymbolRc, ()>::new(); - (0..1000).map(|_| sut.intern("first")).for_each(drop); - }); - } - - mod fx { - use super::*; - use fxhash::FxBuildHasher; - - /// This is our baseline with a raw Rc<str>. - #[bench] - fn with_all_new_rc_str_1000_baseline(bench: &mut Bencher) { - let strs = gen_strs(1000); - bench.iter(|| { - let mut sut = HashMapSut::<(), FxBuildHasher>::new(); - strs.iter().map(|s| sut.intern(&s)).for_each(drop); - }); - } - - #[bench] - fn with_all_new_1000(bench: &mut Bencher) { - let strs = gen_strs(1000); - - bench.iter(|| { - let mut sut = - HashMapInterner::<SymbolRc, (), FxBuildHasher>::new(); - strs.iter().map(|s| sut.intern(&s)).for_each(drop); - }); - } - - #[bench] - /// This is our baseline with a raw Rc<str>. - fn with_one_new_rc_str_1000_baseline(bench: &mut Bencher) { - bench.iter(|| { - let mut sut: HashMapSut<(), FxBuildHasher> = HashMapSut { - map: HashMap::with_hasher(Default::default()), - }; - (0..1000).map(|_| sut.intern("first")).for_each(drop); - }); - } - - #[bench] - fn with_one_new_1000(bench: &mut Bencher) { - bench.iter(|| { - let mut sut = - HashMapInterner::<SymbolRc, (), FxBuildHasher>::new(); - (0..1000).map(|_| sut.intern("first")).for_each(drop); - }); - } - - /// Since FNV is the best-performing, let's build upon it to demonstrate - /// the benefits of with_capacity - #[bench] - fn with_all_new_1000_with_capacity(bench: &mut Bencher) { - let n = 1000; - let strs = gen_strs(n); - - bench.iter(|| { - let mut sut = - HashMapInterner::<SymbolRc, (), FxBuildHasher>::with_capacity(n); - strs.iter().map(|s| sut.intern(&s)).for_each(drop); - }); - } - - #[bench] - fn with_all_new_meta_1000(bench: &mut Bencher) { - let strs = gen_strs(1000); - - bench.iter(|| { - let mut sut = - HashMapInterner::<SymbolRc, u8, FxBuildHasher>::new(); - strs.iter().map(|s| sut.intern_meta(&s, 0)).for_each(drop); - }); - } - - #[bench] - fn with_all_new_then_set_meta_1000(bench: &mut Bencher) { - let strs = gen_strs(1000); - - bench.iter(|| { - let mut sut = - HashMapInterner::<SymbolRc, u8, FxBuildHasher>::new(); - strs.iter() - .map(|s| { - sut.intern(&s); - sut.intern_meta(&s, 0); - }) - .for_each(drop); - }); - } - } -} |