Mike Gerwitz

Activist for User Freedom

aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMike Gerwitz <mike.gerwitz@ryansg.com>2019-12-23 23:26:42 -0500
committerMike Gerwitz <mike.gerwitz@ryansg.com>2020-02-24 14:56:28 -0500
commit1f4db84f248332a1a891e4d9b7ca28cf3afc547d (patch)
tree73dd8525689fda1b346a1ab05f4e642eed3a76f4 /tamer/benches
parent176d099fb61a49492515a0fd35fc19b4f2b70ce9 (diff)
downloadtame-1f4db84f248332a1a891e4d9b7ca28cf3afc547d.tar.gz
tame-1f4db84f248332a1a891e4d9b7ca28cf3afc547d.tar.bz2
tame-1f4db84f248332a1a891e4d9b7ca28cf3afc547d.zip
TAMER: Arena-based string interner
Contrary to what I said previously, this replaces the previous implementation with an arena-backed internment system. The motivation for this change was investigating how Rustc performed its string interning, and why they chose to associate integer identifiers with symbols. The intent was originally to use Rustc's arena allocator directly, but that create pulled in far too many dependencies and depended on nightly Rust. Bumpalo provides a very similar implementation to Rustc's DroplessArena, so I went with that instead. Rustc also relies on a global, singleton interner. I do not do that here. Instead, the returned Symbol carries a lifetime of the underlying arena, as well as a pointer to the interned string. Now that this is put to rest, it's time to move on.
Diffstat (limited to 'tamer/benches')
-rw-r--r--tamer/benches/sym.rs233
1 files changed, 6 insertions, 227 deletions
diff --git a/tamer/benches/sym.rs b/tamer/benches/sym.rs
index 9031f11..2e1c894 100644
--- a/tamer/benches/sym.rs
+++ b/tamer/benches/sym.rs
@@ -28,67 +28,13 @@ use std::rc::Rc;
use tamer::sym::*;
use test::Bencher;
-mod symbol {
- use super::*;
-
- /// This is our baseline. We should never be any slower than this.
- #[bench]
- fn new_raw_rc_1000_baseline(bench: &mut Bencher) {
- let s = "foo bar baz";
-
- bench.iter(|| {
- (0..1000)
- .map(|_| {
- let _: Rc<str> = s.into();
- })
- .for_each(drop);
- });
- }
-
- /// Using the `SymbolRc` wrapper should perform no differently than the
- /// above test with `Rc<str>`.
- #[bench]
- fn new_symbol_rc_1000(bench: &mut Bencher) {
- let s = "foo bar baz";
-
- bench.iter(|| {
- (0..1000).map(|_| SymbolRc::new(s)).for_each(drop);
- });
- }
-
- /// Rc uses pointer comparisons when possible. We want to match that.
- #[bench]
- fn eq_check_rc_baseline(bench: &mut Bencher) {
- bench.iter(|| {
- let a: Rc<str> = "foobarbazquux".into();
- let b: Rc<str> = "foobarbazquux".into();
- let c: Rc<str> = "foobarbazquuxx".into();
-
- let _ = a == b;
- let _ = a == c;
- });
- }
-
- #[bench]
- fn eq_check_symbol_rc_baseline(bench: &mut Bencher) {
- bench.iter(|| {
- let a: SymbolRc = "foobarbazquux".into();
- let b: SymbolRc = "foobarbazquux".into();
- let c: SymbolRc = "foobarbazquuxx".into();
-
- let _ = a == b;
- let _ = a == c;
- });
- }
-}
-
fn gen_strs(n: usize) -> Vec<String> {
(0..n)
.map(|n| n.to_string() + "foobarbazquuxlongsymbol")
.collect()
}
-mod hash_set {
+mod interner {
use super::*;
use std::collections::hash_map::RandomState;
use std::collections::HashSet;
@@ -137,7 +83,7 @@ mod hash_set {
let strs = gen_strs(1000);
bench.iter(|| {
- let mut sut = HashSetInterner::<SymbolRc>::new();
+ let sut = ArenaInterner::<RandomState>::new();
strs.iter().map(|s| sut.intern(&s)).for_each(drop);
});
}
@@ -154,7 +100,7 @@ mod hash_set {
#[bench]
fn with_one_new_1000(bench: &mut Bencher) {
bench.iter(|| {
- let mut sut = HashSetInterner::<SymbolRc>::new();
+ let sut = ArenaInterner::<RandomState>::new();
(0..1000).map(|_| sut.intern("first")).for_each(drop);
});
}
@@ -178,7 +124,7 @@ mod hash_set {
let strs = gen_strs(1000);
bench.iter(|| {
- let mut sut = HashSetInterner::<SymbolRc, FxBuildHasher>::new();
+ let sut = ArenaInterner::<FxBuildHasher>::new();
strs.iter().map(|s| sut.intern(&s)).for_each(drop);
});
}
@@ -197,7 +143,7 @@ mod hash_set {
#[bench]
fn with_one_new_1000(bench: &mut Bencher) {
bench.iter(|| {
- let mut sut = HashSetInterner::<SymbolRc, FxBuildHasher>::new();
+ let sut = ArenaInterner::<FxBuildHasher>::new();
(0..1000).map(|_| sut.intern("first")).for_each(drop);
});
}
@@ -210,176 +156,9 @@ mod hash_set {
let strs = gen_strs(n);
bench.iter(|| {
- let mut sut =
- HashSetInterner::<SymbolRc, FxBuildHasher>::with_capacity(
- n,
- );
+ let sut = ArenaInterner::<FxBuildHasher>::with_capacity(n);
strs.iter().map(|s| sut.intern(&s)).for_each(drop);
});
}
}
}
-
-mod hash_map {
- use super::*;
- use std::collections::hash_map::{Entry, RandomState};
- use std::collections::HashMap;
- use std::hash::BuildHasher;
-
- pub struct HashMapSut<M, S = RandomState>
- where
- S: BuildHasher,
- {
- pub map: HashMap<Rc<str>, M, S>,
- }
-
- impl<M, S> HashMapSut<M, S>
- where
- M: Default,
- S: BuildHasher + Default,
- {
- #[inline]
- fn new() -> Self {
- Self {
- map: HashMap::with_hasher(Default::default()),
- }
- }
-
- pub fn intern(&mut self, value: &str) -> Rc<str> {
- match self.map.entry(value.into()) {
- Entry::Vacant(v) => {
- let intern = v.key().clone();
-
- v.insert(Default::default());
- intern
- }
- Entry::Occupied(o) => o.key().clone(),
- }
- }
- }
-
- /// This is our baseline with a raw Rc<str>.
- #[bench]
- fn with_all_new_rc_str_1000_baseline(bench: &mut Bencher) {
- let strs = gen_strs(1000);
-
- bench.iter(|| {
- let mut sut = HashMapSut::<(), RandomState>::new();
- strs.iter().map(|s| sut.intern(&s)).for_each(drop);
- });
- }
-
- #[bench]
- fn with_all_new_1000(bench: &mut Bencher) {
- let strs = gen_strs(1000);
-
- bench.iter(|| {
- let mut sut = HashMapInterner::<SymbolRc, ()>::new();
- strs.iter().map(|s| sut.intern(&s)).for_each(drop);
- });
- }
-
- #[bench]
- /// This is our baseline with a raw Rc<str>.
- fn with_one_new_rc_str_1000_baseline(bench: &mut Bencher) {
- bench.iter(|| {
- let mut sut = HashMapSut::<(), RandomState>::new();
- (0..1000).map(|_| sut.intern("first")).for_each(drop);
- });
- }
-
- #[bench]
- fn with_one_new_1000(bench: &mut Bencher) {
- bench.iter(|| {
- let mut sut = HashMapInterner::<SymbolRc, ()>::new();
- (0..1000).map(|_| sut.intern("first")).for_each(drop);
- });
- }
-
- mod fx {
- use super::*;
- use fxhash::FxBuildHasher;
-
- /// This is our baseline with a raw Rc<str>.
- #[bench]
- fn with_all_new_rc_str_1000_baseline(bench: &mut Bencher) {
- let strs = gen_strs(1000);
- bench.iter(|| {
- let mut sut = HashMapSut::<(), FxBuildHasher>::new();
- strs.iter().map(|s| sut.intern(&s)).for_each(drop);
- });
- }
-
- #[bench]
- fn with_all_new_1000(bench: &mut Bencher) {
- let strs = gen_strs(1000);
-
- bench.iter(|| {
- let mut sut =
- HashMapInterner::<SymbolRc, (), FxBuildHasher>::new();
- strs.iter().map(|s| sut.intern(&s)).for_each(drop);
- });
- }
-
- #[bench]
- /// This is our baseline with a raw Rc<str>.
- fn with_one_new_rc_str_1000_baseline(bench: &mut Bencher) {
- bench.iter(|| {
- let mut sut: HashMapSut<(), FxBuildHasher> = HashMapSut {
- map: HashMap::with_hasher(Default::default()),
- };
- (0..1000).map(|_| sut.intern("first")).for_each(drop);
- });
- }
-
- #[bench]
- fn with_one_new_1000(bench: &mut Bencher) {
- bench.iter(|| {
- let mut sut =
- HashMapInterner::<SymbolRc, (), FxBuildHasher>::new();
- (0..1000).map(|_| sut.intern("first")).for_each(drop);
- });
- }
-
- /// Since FNV is the best-performing, let's build upon it to demonstrate
- /// the benefits of with_capacity
- #[bench]
- fn with_all_new_1000_with_capacity(bench: &mut Bencher) {
- let n = 1000;
- let strs = gen_strs(n);
-
- bench.iter(|| {
- let mut sut =
- HashMapInterner::<SymbolRc, (), FxBuildHasher>::with_capacity(n);
- strs.iter().map(|s| sut.intern(&s)).for_each(drop);
- });
- }
-
- #[bench]
- fn with_all_new_meta_1000(bench: &mut Bencher) {
- let strs = gen_strs(1000);
-
- bench.iter(|| {
- let mut sut =
- HashMapInterner::<SymbolRc, u8, FxBuildHasher>::new();
- strs.iter().map(|s| sut.intern_meta(&s, 0)).for_each(drop);
- });
- }
-
- #[bench]
- fn with_all_new_then_set_meta_1000(bench: &mut Bencher) {
- let strs = gen_strs(1000);
-
- bench.iter(|| {
- let mut sut =
- HashMapInterner::<SymbolRc, u8, FxBuildHasher>::new();
- strs.iter()
- .map(|s| {
- sut.intern(&s);
- sut.intern_meta(&s, 0);
- })
- .for_each(drop);
- });
- }
- }
-}