Mike Gerwitz

Activist for User Freedom

aboutsummaryrefslogtreecommitdiffstats
path: root/tamer
diff options
context:
space:
mode:
authorMike Gerwitz <mike.gerwitz@ryansg.com>2020-03-03 11:29:37 -0500
committerMike Gerwitz <mike.gerwitz@ryansg.com>2020-03-03 11:32:49 -0500
commit777494a6026e6eabff9ca9f05f00aa92e2f58c3d (patch)
tree1a71897b93da29feb2113896d60ce84b8e485796 /tamer
parente1076ce3880f5533cad311543f551a50be58c53e (diff)
parent6ac764108768e85db597c478ef7a754be416076b (diff)
downloadtame-777494a6026e6eabff9ca9f05f00aa92e2f58c3d.tar.gz
tame-777494a6026e6eabff9ca9f05f00aa92e2f58c3d.tar.bz2
tame-777494a6026e6eabff9ca9f05f00aa92e2f58c3d.zip
TAMER linker (still partly proof-of-concept)
We will continue to finalize this as we go. It is currently used in production, both for performance and because it fixes a bug in the XSLT-based linker.
Diffstat (limited to 'tamer')
-rw-r--r--tamer/Cargo.lock23
-rw-r--r--tamer/Cargo.toml11
-rw-r--r--tamer/Makefile.am19
-rw-r--r--tamer/README.md48
-rw-r--r--tamer/benches/sym.rs174
-rw-r--r--tamer/build-aux/bench_check.rs23
-rw-r--r--tamer/build-aux/intra_rustdoc_links_check.rs23
-rw-r--r--tamer/configure.ac43
-rw-r--r--tamer/src/bin/tameld.rs15
-rw-r--r--tamer/src/global.rs81
-rw-r--r--tamer/src/ir/asg/base.rs652
-rw-r--r--tamer/src/ir/asg/graph.rs249
-rw-r--r--tamer/src/ir/asg/ident.rs376
-rw-r--r--tamer/src/ir/asg/mod.rs192
-rw-r--r--tamer/src/ir/asg/object.rs230
-rw-r--r--tamer/src/ir/legacyir.rs347
-rw-r--r--tamer/src/ir/mod.rs62
-rw-r--r--tamer/src/ld.rs92
-rw-r--r--tamer/src/ld/poc.rs593
-rw-r--r--tamer/src/lib.rs9
-rw-r--r--tamer/src/obj/mod.rs35
-rw-r--r--tamer/src/obj/xmle/mod.rs69
-rw-r--r--tamer/src/obj/xmle/writer/mod.rs47
-rw-r--r--tamer/src/obj/xmle/writer/writer.rs320
-rw-r--r--tamer/src/obj/xmle/writer/xmle.rs749
-rw-r--r--tamer/src/obj/xmlo/mod.rs75
-rw-r--r--tamer/src/obj/xmlo/reader.rs1787
-rw-r--r--tamer/src/sym.rs826
-rw-r--r--tamer/src/test/mod.rs18
-rw-r--r--tamer/src/test/quick_xml/mod.rs125
30 files changed, 6947 insertions, 366 deletions
diff --git a/tamer/Cargo.lock b/tamer/Cargo.lock
index 8db9877..44b4f19 100644
--- a/tamer/Cargo.lock
+++ b/tamer/Cargo.lock
@@ -1,11 +1,29 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
[[package]]
+name = "bumpalo"
+version = "2.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
+[[package]]
+name = "byteorder"
+version = "1.3.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
+[[package]]
name = "fixedbitset"
version = "0.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
+name = "fxhash"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "byteorder 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
name = "memchr"
version = "2.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -36,13 +54,18 @@ dependencies = [
name = "tamer"
version = "0.0.0"
dependencies = [
+ "bumpalo 2.6.0 (registry+https://github.com/rust-lang/crates.io-index)",
"fixedbitset 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)",
+ "fxhash 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
"petgraph 0.4.13 (registry+https://github.com/rust-lang/crates.io-index)",
"quick-xml 0.17.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[metadata]
+"checksum bumpalo 2.6.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ad807f2fc2bf185eeb98ff3a901bd46dc5ad58163d0fa4577ba0d25674d71708"
+"checksum byteorder 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "a7c3dd8985a7111efc5c80b44e23ecdd8c007de8ade3b96595387e812b957cf5"
"checksum fixedbitset 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)" = "86d4de0081402f5e88cdac65c8dcdcc73118c1a7a465e2a05f0da05843a8ea33"
+"checksum fxhash 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c"
"checksum memchr 2.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "88579771288728879b57485cc7d6b07d648c9f0141eb955f8ab7f9d45394468e"
"checksum ordermap 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "a86ed3f5f244b372d6b1a00b72ef7f8876d0bc6a78a4c9985c53614041512063"
"checksum petgraph 0.4.13 (registry+https://github.com/rust-lang/crates.io-index)" = "9c3659d1ee90221741f65dd128d9998311b0e40c5d3c23a62445938214abce4f"
diff --git a/tamer/Cargo.toml b/tamer/Cargo.toml
index 9880167..64519f9 100644
--- a/tamer/Cargo.toml
+++ b/tamer/Cargo.toml
@@ -17,8 +17,15 @@ opt-level = 3
[profile.release]
lto = true
+[profile.bench]
+# We want our benchmarks to be representative of how well TAME will perform
+# in a release.
+lto = true
+
[dependencies]
-quick-xml = ">= 0.17.0"
-petgraph = ">= 0.4.13"
+bumpalo = ">= 2.6.0"
# used by petgraph
fixedbitset = ">= 0.1"
+fxhash = ">= 0.2.1"
+petgraph = ">= 0.4.13"
+quick-xml = ">= 0.17.0"
diff --git a/tamer/Makefile.am b/tamer/Makefile.am
index 2cc502c..3f84621 100644
--- a/tamer/Makefile.am
+++ b/tamer/Makefile.am
@@ -18,7 +18,7 @@
.DELETE_ON_ERROR:
-.PHONY: all
+.PHONY: all fix fmt check-fmt bench
CARGO_BUILD_FLAGS=@CARGO_BUILD_FLAGS@
@@ -27,10 +27,23 @@ all:
doc: html
html-am:
- @CARGO@ doc
+ @CARGO@ test --doc
+ @CARGO@ @CARGO_DOC_FLAGS@ doc --document-private-items
# note that 'cargo check' is something else; see 'cargo --help'
test: check
-check-am:
+check-am: check-fmt
@CARGO@ test
+check-fmt:
+ @CARGO@ fmt -- --check
+
+bench:
+ @CARGO@ @CARGO_BENCH_FLAGS@ bench
+
+fix: fmt
+fmt:
+ @CARGO@ fmt
+
+clean-am:
+ @CARGO@ clean
diff --git a/tamer/README.md b/tamer/README.md
index 6d44f02..4f602f0 100644
--- a/tamer/README.md
+++ b/tamer/README.md
@@ -47,3 +47,51 @@ $ ./configure CARGO_BUILD_FLAGS=--release && make
$ ./configure && make
$ ./configure CARGO_BUILD_FLAGS=--release && make CARGO_BUILD_FLAGS=
```
+
+
+## Hacking
+This section contains advice for those developing TAMER.
+
+
+### Running Tests
+Developers should be using test-driven development (TDD). `make check` will
+run all necessary tests.
+
+
+### Code Format
+Rust provides `rustfmt` that can automatically format code for you. This
+project mandates its use and therefore eliminates personal preference in
+code style (for better or worse).
+
+Formatting checks are run during `make check` and, on failure, will output
+the diff that would be applied if you ran `make fmt` (or `make fix`); this
+will run `cargo fmt` for you (and will use the binaries configured via
+`configure`).
+
+Since developers should be doing test-driven development (TDD) and therefore
+should be running `make check` frequently, the hope is that frequent
+feedback on formatting issues will allow developers to quickly adjust their
+habits to avoid triggering formatting errors at all.
+
+If you want to automatically fix formatting errors and then run tests:
+
+```sh
+$ make fmt check
+```
+
+
+## Benchmarking
+Benchmarks serve two purposes: external integration tests (which are subject
+to module visibility constraints) and actual benchmarking. To run
+benchmarks, invoke `make bench`.
+
+Note that link-time optimizations (LTO) are performed on the binary for
+benchmarking so that its performance reflects release builds that will be
+used in production.
+
+The `configure` script will automatically detect whether the `test` feature
+is unstable (as it was as of the time of writing) and, if so, will
+automatically fall back to invoking nightly (by running `cargo +nightly
+bench`).
+
+If you do not have nightly, run you install it via `rustup install nightly`.
diff --git a/tamer/benches/sym.rs b/tamer/benches/sym.rs
new file mode 100644
index 0000000..d96b7d2
--- /dev/null
+++ b/tamer/benches/sym.rs
@@ -0,0 +1,174 @@
+// String internment benchmarks and baselines
+//
+// Copyright (C) 2014-2019 Ryan Specialty Group, LLC.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see <http://www.gnu.org/licenses/>.
+//
+// Note that the baseline tests have a _suffix_ rather than a prefix so that
+// they are still grouped with the associated test in the output, since it's
+// sorted lexically by function name.
+
+#![feature(test)]
+
+extern crate tamer;
+extern crate test;
+
+use std::rc::Rc;
+use tamer::sym::*;
+use test::Bencher;
+
+fn gen_strs(n: usize) -> Vec<String> {
+ (0..n)
+ .map(|n| n.to_string() + "foobarbazquuxlongsymbol")
+ .collect()
+}
+
+mod interner {
+ use super::*;
+ use std::collections::hash_map::RandomState;
+ use std::collections::HashSet;
+ use std::hash::BuildHasher;
+
+ pub struct HashSetSut<S = RandomState>
+ where
+ S: BuildHasher,
+ {
+ pub map: HashSet<Rc<str>, S>,
+ }
+
+ impl<S> HashSetSut<S>
+ where
+ S: BuildHasher + Default,
+ {
+ #[inline]
+ fn new() -> Self {
+ Self {
+ map: HashSet::with_hasher(Default::default()),
+ }
+ }
+
+ pub fn intern(&mut self, value: &str) -> Rc<str> {
+ if !self.map.contains(value) {
+ self.map.insert(value.into());
+ }
+
+ self.map.get(value).unwrap().clone()
+ }
+ }
+
+ /// This is our baseline with a raw Rc<str>.
+ #[bench]
+ fn with_all_new_rc_str_1000_baseline(bench: &mut Bencher) {
+ let strs = gen_strs(1000);
+
+ bench.iter(|| {
+ let mut sut = HashSetSut::<RandomState>::new();
+ strs.iter().map(|s| sut.intern(&s)).for_each(drop);
+ });
+ }
+
+ #[bench]
+ fn with_all_new_1000(bench: &mut Bencher) {
+ let strs = gen_strs(1000);
+
+ bench.iter(|| {
+ let sut = ArenaInterner::<RandomState>::new();
+ strs.iter().map(|s| sut.intern(&s)).for_each(drop);
+ });
+ }
+
+ #[bench]
+ /// This is our baseline with a raw Rc<str>.
+ fn with_one_new_rc_str_1000_baseline(bench: &mut Bencher) {
+ bench.iter(|| {
+ let mut sut = HashSetSut::<RandomState>::new();
+ (0..1000).map(|_| sut.intern("first")).for_each(drop);
+ });
+ }
+
+ #[bench]
+ fn with_one_new_1000(bench: &mut Bencher) {
+ bench.iter(|| {
+ let sut = ArenaInterner::<RandomState>::new();
+ (0..1000).map(|_| sut.intern("first")).for_each(drop);
+ });
+ }
+
+ mod fx {
+ use super::*;
+ use fxhash::FxBuildHasher;
+
+ /// This is our baseline with a raw Rc<str>.
+ #[bench]
+ fn with_all_new_rc_str_1000_baseline(bench: &mut Bencher) {
+ let strs = gen_strs(1000);
+ bench.iter(|| {
+ let mut sut = HashSetSut::<FxBuildHasher>::new();
+ strs.iter().map(|s| sut.intern(&s)).for_each(drop);
+ });
+ }
+
+ #[bench]
+ fn with_all_new_1000(bench: &mut Bencher) {
+ let strs = gen_strs(1000);
+
+ bench.iter(|| {
+ let sut = ArenaInterner::<FxBuildHasher>::new();
+ strs.iter().map(|s| sut.intern(&s)).for_each(drop);
+ });
+ }
+
+ #[bench]
+ /// This is our baseline with a raw Rc<str>.
+ fn with_one_new_rc_str_1000_baseline(bench: &mut Bencher) {
+ bench.iter(|| {
+ let mut sut: HashSetSut<FxBuildHasher> = HashSetSut {
+ map: HashSet::with_hasher(Default::default()),
+ };
+ (0..1000).map(|_| sut.intern("first")).for_each(drop);
+ });
+ }
+
+ #[bench]
+ fn with_one_new_1000(bench: &mut Bencher) {
+ bench.iter(|| {
+ let sut = ArenaInterner::<FxBuildHasher>::new();
+ (0..1000).map(|_| sut.intern("first")).for_each(drop);
+ });
+ }
+
+ #[bench]
+ fn with_one_new_1000_utf8_unchecked(bench: &mut Bencher) {
+ bench.iter(|| {
+ let sut = ArenaInterner::<FxBuildHasher>::new();
+ (0..1000)
+ .map(|_| unsafe { sut.intern_utf8_unchecked(b"first") })
+ .for_each(drop);
+ });
+ }
+
+ /// Since Fx is the best-performing, let's build upon it to demonstrate
+ /// the benefits of with_capacity
+ #[bench]
+ fn with_all_new_1000_with_capacity(bench: &mut Bencher) {
+ let n = 1000;
+ let strs = gen_strs(n);
+
+ bench.iter(|| {
+ let sut = ArenaInterner::<FxBuildHasher>::with_capacity(n);
+ strs.iter().map(|s| sut.intern(&s)).for_each(drop);
+ });
+ }
+ }
+}
diff --git a/tamer/build-aux/bench_check.rs b/tamer/build-aux/bench_check.rs
new file mode 100644
index 0000000..ac3d8e6
--- /dev/null
+++ b/tamer/build-aux/bench_check.rs
@@ -0,0 +1,23 @@
+// Feature check for `test`
+//
+// Copyright (C) 2014-2019 Ryan Specialty Group, LLC.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+// As of the time of writing, this feature is unstable and can only be
+// enabled in nightly. This file is intended to be used in the `configure`
+// script to determine whether a nightly version of Rust must be used to
+// invoke benchmarks.
+#![feature(test)]
+
diff --git a/tamer/build-aux/intra_rustdoc_links_check.rs b/tamer/build-aux/intra_rustdoc_links_check.rs
new file mode 100644
index 0000000..d0f59be
--- /dev/null
+++ b/tamer/build-aux/intra_rustdoc_links_check.rs
@@ -0,0 +1,23 @@
+// Feature check for `test`
+//
+// Copyright (C) 2014-2019 Ryan Specialty Group, LLC.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+// As of the time of writing, this feature is unstable and can only be
+// enabled in nightly. This file is intended to be used in the `configure`
+// script to determine whether a nightly version of Rust must be used to
+// build documentation.
+#![feature(intra_rustdoc_links)]
+
diff --git a/tamer/configure.ac b/tamer/configure.ac
index 9ed3526..06a30fe 100644
--- a/tamer/configure.ac
+++ b/tamer/configure.ac
@@ -43,18 +43,57 @@ AC_CHECK_PROGS(CARGO, [cargo])
test -n "$CARGO" || AC_MSG_ERROR([cargo not found])
-rustc_ver_req=1.39.0
+rustc_ver_req=1.41.0
AC_CHECK_PROGS(RUSTC, [rustc])
AC_MSG_CHECKING([rustc version >= $rustc_ver_req])
rustc_version=$("$RUSTC" --version | cut -d' ' -f2)
AX_COMPARE_VERSION([$rustc_version], [ge], [$rustc_ver_req],
[AC_MSG_RESULT([yes ($rustc_version)])],
- [AC_MSG_ERROR([no ($rustc_version)])])
+ [AC_MSG_RESULT([no ($rustc_version)])
+ AC_MSG_ERROR([If using rustup, run `rustup update'])])
AC_ARG_VAR([CARGO_BUILD_FLAGS],
[Flags to be passed to `cargo build' when invoked via Make])
+# The `intra_rustdoc_links` feature is required for building
+# documentation. If unavailable, then it's still an unstable feature and
+# we'll need to use nightly. We don't check for nightly here, though---if
+# it's missing, then cargo will tell the user what to do.
+AC_MSG_CHECKING([`intra_rustdoc_links_check` feature support])
+AS_IF(["$RUSTC" --crate-type lib build_aux/intra_rustdoc_links_check.rs &>/dev/null],
+ [AC_MSG_RESULT(available)],
+ [AC_MSG_RESULT([no (nightly required)])
+ AC_SUBST([CARGO_DOC_FLAGS], [+nightly])])
+
+# The `test` feature is required for benchmarking. If unavailable, then
+# it's still an unstable feature and we'll need to use nightly. We don't
+# check for nightly here, though---if it's missing, then cargo will tell the
+# user what to do.
+AC_MSG_CHECKING([`test` feature support])
+AS_IF(["$RUSTC" --crate-type lib build_aux/bench_check.rs &>/dev/null],
+ [AC_MSG_RESULT(available)],
+ [AC_MSG_RESULT([no (nightly required)])
+ AC_SUBST([CARGO_BENCH_FLAGS], [+nightly])])
+
+# Cargo commands may be available but not necessarily installed for the
+# active toolchain. Let's check that.
+AC_MSG_CHECKING([whether cargo-fmt is available for active toolchain])
+AS_IF([cargo fmt --help &>/dev/null],
+ [AC_MSG_RESULT(yes)],
+ [AC_MSG_RESULT(no)
+ cargo fmt --help # run again so user can see output
+ AC_MSG_ERROR([missing cargo-fmt for active toolchain])])
+
+# Cargo commands may be available but not necessarily installed for the
+# active toolchain. Let's check that.
+AC_MSG_CHECKING([whether cargo-doc is available for toolchain])
+AS_IF([cargo $CARGO_DOC_FLAGS doc --help &>/dev/null],
+ [AC_MSG_RESULT(yes)],
+ [AC_MSG_RESULT(no)
+ cargo $CARGO_DOC_FLAGS doc --help # run again so user can see output
+ AC_MSG_ERROR([missing cargo-doc for toolchain])])
+
AC_CONFIG_FILES([Makefile])
AC_OUTPUT
diff --git a/tamer/src/bin/tameld.rs b/tamer/src/bin/tameld.rs
index f6767b3..40f51bc 100644
--- a/tamer/src/bin/tameld.rs
+++ b/tamer/src/bin/tameld.rs
@@ -19,19 +19,8 @@
//! utility. Its job is to take each of the compiled object files and
//! produce a final executable.
//!
-//! # Backwards-Compatibility (XSLT System)
-//! This linker is part of the TAMER (TAME in Rust) project, which aims to
-//! incrementally rewrite TAME in Rust. Consequently, it must be able to
-//! serve as a drop-in replacement for the existing (XSLT) linker, which
-//! takes as input `xmlo` files and produces as output an `xmle` file. This
-//! is not efficient, and future versions will begin to migrate away from
-//! this strategy.
-//!
-//! The output `xmle` file is then fed to a `standalone` command which
-//! extracts the JavaScript fragment and places it into its own file. Even
-//! when that is replaced (when this just outputs a final JS file directly),
-//! the `xmle` file is still needed for other purposes, such as `summary`
-//! and `dote` generation.
+//! For more information about the linker,
+//! see the [`tamer::ld`] module.
extern crate tamer;
diff --git a/tamer/src/global.rs b/tamer/src/global.rs
new file mode 100644
index 0000000..dfab6de
--- /dev/null
+++ b/tamer/src/global.rs
@@ -0,0 +1,81 @@
+// Global constants across the entirety of TAMER
+//
+// Copyright (C) 2014-2019 Ryan Specialty Group, LLC.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+//! System-wide static configuration.
+//!
+//! This module provides a system-wide configuration.
+//! Subsystems should reference these values rather than defining their own
+//! and risk incompatibilities or maintenance issues as requirements
+//! change.
+//!
+//! By convention,
+//! import this entire module rather than individual members and reference
+//! them as `global::foo` to emphasize their nature and risk.
+
+use std::num;
+
+/// A size capable of representing every interned string in a package.
+pub type PkgSymSize = u16;
+
+/// A non-zero equivalent of [`PkgSymSize`];
+pub type NonZeroPkgSymSize = num::NonZeroU16;
+
+/// A size capable of representing every interned string in a program.
+pub type ProgSymSize = u32;
+
+/// A non-zero equivalent of [`ProgSymSize`];
+pub type NonZeroProgSymSize = num::NonZeroU32;
+
+/// A size capable of representing indexes of each individual identifier
+/// within a single package.
+///
+/// Note that,
+/// since TAME is a metalanguage and can easily expand into a great
+/// deal of code,
+/// this must accommodate far more than the user's expectations
+/// working within the provided level of abstraction.
+///
+/// This must be ≥ [`PkgSymSize`].
+pub type PkgIdentSize = u16;
+
+/// A size capable of representing every individual identifier and
+/// expression within a single package.
+///
+/// Note that,
+/// since TAME is a metalanguage and can easily expand into a great
+/// deal of code,
+/// this must accommodate far more than the user's expectations
+/// working within the provided level of abstraction.
+pub type PkgIdentExprSize = u32;
+
+/// A size capable of representing the union of every identifier of every
+/// package used by an entire program.
+///
+/// This must be ≥ [`ProgSymSize`].
+pub type ProgIdentSize = u32;
+
+/// A size capable of representing the union of every identifier and every
+/// expression of every package used by an entire program.
+///
+/// Note that,
+/// since TAME is a metalanguage and can easily expand into a great
+/// deal of code,
+/// this must accommodate far more than the user's expectations
+/// working within the provided level of abstraction.
+///
+/// This must be ≥ [`ProgSymSize`].
+pub type ProgIdentExprSize = u32;
diff --git a/tamer/src/ir/asg/base.rs b/tamer/src/ir/asg/base.rs
new file mode 100644
index 0000000..ed490f7
--- /dev/null
+++ b/tamer/src/ir/asg/base.rs
@@ -0,0 +1,652 @@
+// Concrete ASG
+//
+// Copyright (C) 2014-2019 Ryan Specialty Group, LLC.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+//! Base concrete [`Asg`] implementation.
+
+use super::graph::{Asg, AsgEdge, AsgError, AsgResult, Node, ObjectRef};
+use super::ident::IdentKind;
+use super::object::{FragmentText, Object, Source};
+use crate::sym::Symbol;
+use fixedbitset::FixedBitSet;
+use petgraph::graph::{
+ DiGraph, EdgeIndex, Graph, IndexType, Neighbors, NodeIndex,
+};
+use petgraph::visit::{GraphBase, IntoNeighbors, Visitable};
+
+/// Concrete ASG.
+///
+/// This implementation is currently based on [`petgraph`].
+///
+/// Identifiers are cached by name for `O(1)` lookup.
+/// Since [`SymbolIndex`][crate::sym::SymbolIndex] is used for this purpose,
+/// the index may contain more entries than nodes and may contain gaps.
+///
+/// For more information,
+/// see [`Asg`].
+pub struct BaseAsg<'i, Ix: IndexType> {
+ /// Directed graph on which objects are stored.
+ graph: DiGraph<Node<'i>, AsgEdge, Ix>,
+
+ /// Map of [`SymbolIndex`][crate::sym::SymbolIndex] to node indexes.
+ ///
+ /// This allows for `O(1)` lookup of identifiers in the graph.
+ /// Note that,
+ /// while we store [`NodeIndex`] internally,
+ /// the public API encapsulates it within an [`ObjectRef`].
+ index: Vec<NodeIndex<Ix>>,
+
+ /// Empty node indicating that no object exists for a given index.
+ empty_node: NodeIndex<Ix>,
+}
+
+impl<'i, Ix> BaseAsg<'i, Ix>
+where
+ Ix: IndexType,
+{
+ /// Create an ASG with the provided initial capacity.
+ ///
+ /// The value for `objects` will be used as the capacity for the nodes
+ /// in the graph,
+ /// as well as the initial index capacity.
+ /// The value for `edges` may be more difficult to consider,
+ /// since edges are used to represent various relationships between
+ /// different types of objects,
+ /// but it's safe to say that each object will have at least one
+ /// edge to another object.
+ ///
+ /// A basic `new` method is not provided to ensure that callers consider
+ /// capacity during construction,
+ /// since graphs can get quite large.
+ pub fn with_capacity(objects: usize, edges: usize) -> Self {
+ let mut graph = Graph::with_capacity(objects, edges);
+ let mut index = Vec::with_capacity(objects);
+
+ // Exhaust the first index to be used as a placeholder.
+ let empty_node = graph.add_node(Some(Object::Empty));
+ index.push(empty_node);
+
+ Self {
+ graph,
+ index,
+ empty_node,
+ }
+ }
+
+ /// Index the provided symbol `name` as representing the identifier `node`.
+ ///
+ /// This index permits `O(1)` identifier lookups.
+ ///
+ /// After an identifier is indexed it is not expected to be reassigned
+ /// to another node.
+ /// Debug builds contain an assertion that will panic in this instance.
+ ///
+ /// Panics
+ /// ======
+ /// Will panic if unable to allocate more space for the index.
+ fn index_identifier(&mut self, name: &'i Symbol<'i>, node: NodeIndex<Ix>) {
+ let i: usize = name.index().into();
+
+ if i >= self.index.len() {
+ // If this is ever a problem we can fall back to usize max and
+ // re-compare before panicing
+ let new_size = (i + 1)
+ .checked_next_power_of_two()
+ .expect("internal error: cannot allocate space for ASG index");
+
+ self.index.resize(new_size, self.empty_node);
+ }
+
+ // We should never overwrite indexes
+ debug_assert!(self.index[i] == self.empty_node);
+
+ self.index[i] = node;
+ }
+
+ /// Lookup `ident` or add an [`Object::Missing`] to the graph and
+ /// return a reference to it.
+ #[inline]
+ fn lookup_or_missing(&mut self, ident: &'i Symbol<'i>) -> ObjectRef<Ix> {
+ self.lookup(ident).unwrap_or_else(|| {
+ let index = self.graph.add_node(Some(Object::Missing(ident)));
+
+ self.index_identifier(ident, index);
+ ObjectRef(index)
+ })
+ }
+}
+
+impl<'i, Ix> Asg<'i, Ix> for BaseAsg<'i, Ix>
+where
+ Ix: IndexType,
+{
+ fn declare(
+ &mut self,
+ name: &'i Symbol<'i>,
+ kind: IdentKind,
+ src: Source<'i>,
+ ) -> AsgResult<ObjectRef<Ix>> {
+ // TODO: src check
+ if let Some(existing) = self.lookup(name) {
+ let node = self.graph.node_weight_mut(existing.0).unwrap();
+
+ match node {
+ Some(Object::Missing(_)) => {
+ node.replace(Object::Ident(name, kind, src));
+ return Ok(existing);
+ }
+ // TODO: no override-override
+ Some(Object::Ident(_, _, orig_src))
+ if orig_src.virtual_ && src.override_ =>
+ {
+ *orig_src = src;
+ return Ok(existing);
+ }
+ // TODO: no override-override
+ Some(Object::IdentFragment(_, _, orig_src, _))
+ if orig_src.virtual_ && src.override_ =>
+ {
+ // clears fragment, which is no longer applicable
+ node.replace(Object::Ident(name, kind, src));
+ return Ok(existing);
+ }
+ _ => return Ok(existing),
+ }
+ }
+
+ let node = self.graph.add_node(Some(Object::Ident(name, kind, src)));
+
+ self.index_identifier(name, node);
+
+ Ok(ObjectRef(node))
+ }
+
+ fn declare_extern(
+ &mut self,
+ name: &'i Symbol<'i>,
+ expected_kind: IdentKind,
+ ) -> AsgResult<ObjectRef<Ix>> {
+ // TODO: resolution!
+ let node = self
+ .graph
+ .add_node(Some(Object::Extern(name, expected_kind)));
+
+ self.index_identifier(name, node);
+
+ Ok(ObjectRef(node))
+ }
+
+ fn set_fragment(
+ &mut self,
+ identi: ObjectRef<Ix>,
+ text: FragmentText,
+ ) -> AsgResult<ObjectRef<Ix>> {
+ // This should _never_ happen as long as you're only using ObjectRef
+ // values produced by these methods.
+ let node = self
+ .graph
+ .node_weight_mut(identi.0)
+ .expect("internal error: BaseAsg::set_fragment bogus identi");
+
+ // This should also never happen, since we immediately repopulate
+ // the node below.
+ let ty = node
+ .take()
+ .expect("internal error: BaseAsg::set_fragment missing Node data");
+
+ let result = match ty {
+ Object::Ident(sym, kind, src) => {
+ Ok(Object::IdentFragment(sym, kind, src, text))
+ }
+ _ => {
+ let err = Err(AsgError::BadFragmentDest(format!(
+ "identifier is not a Object::Ident): {:?}",
+ ty,
+ )));
+
+ node.replace(ty);
+ err
+ }
+ }?;
+
+ node.replace(result);
+
+ Ok(identi)
+ }
+
+ #[inline]
+ fn get<I: Into<ObjectRef<Ix>>>(&self, index: I) -> Option<&Object<'i>> {
+ self.graph.node_weight(index.into().0).map(|node| {
+ node.as_ref()
+ .expect("internal error: BaseAsg::get missing Node data")
+ })
+ }
+
+ #[inline]
+ fn lookup(&self, name: &'i Symbol<'i>) -> Option<ObjectRef<Ix>> {
+ let i: usize = name.index().into();
+
+ self.index
+ .get(i)
+ .filter(|ni| ni.index() > 0)
+ .map(|ni| ObjectRef(*ni))
+ }
+
+ fn add_dep(&mut self, identi: ObjectRef<Ix>, depi: ObjectRef<Ix>) {
+ self.graph.update_edge(identi.0, depi.0, Default::default());
+ }
+
+ #[inline]
+ fn has_dep(&self, ident: ObjectRef<Ix>, dep: ObjectRef<Ix>) -> bool {
+ self.graph.contains_edge(ident.0, dep.0)
+ }
+
+ fn add_dep_lookup(
+ &mut self,
+ ident: &'i Symbol<'i>,
+ dep: &'i Symbol<'i>,
+ ) -> (ObjectRef<Ix>, ObjectRef<Ix>) {
+ let identi = self.lookup_or_missing(ident);
+ let depi = self.lookup_or_missing(dep);
+
+ self.graph.update_edge(identi.0, depi.0, Default::default());
+
+ (identi, depi)
+ }
+}
+
+// TODO: encapsulate Petgraph API (N.B. this is untested!)
+impl<'i, Ix> Visitable for BaseAsg<'i, Ix>
+where
+ Ix: IndexType,
+{
+ type Map = FixedBitSet;
+
+ fn visit_map(&self) -> Self::Map {
+ self.graph.visit_map()
+ }
+
+ fn reset_map(&self, map: &mut Self::Map) {
+ self.graph.reset_map(map)
+ }
+}
+
+impl<'i, Ix> GraphBase for BaseAsg<'i, Ix>
+where
+ Ix: IndexType,
+{
+ type NodeId = NodeIndex<Ix>;
+ type EdgeId = EdgeIndex<Ix>;
+}
+
+impl<'a, 'i, Ix> IntoNeighbors for &'a BaseAsg<'i, Ix>
+where
+ Ix: IndexType,
+{
+ type Neighbors = Neighbors<'a, AsgEdge, Ix>;
+
+ fn neighbors(self, n: Self::NodeId) -> Self::Neighbors {
+ self.graph.neighbors(n)
+ }
+}
+
+#[cfg(test)]
+mod test {
+ use super::*;
+ use crate::sym::SymbolIndex;
+
+ type Sut<'i> = BaseAsg<'i, u8>;
+
+ #[test]
+ fn create_with_capacity() {
+ let node_capacity = 100;
+ let edge_capacity = 300;
+ let sut = Sut::with_capacity(node_capacity, edge_capacity);
+
+ // breaks encapsulation to introspect; the behavior is
+ // transparent to callers (aside from performance
+ // characteristics)
+ let (nc, ec) = sut.graph.capacity();
+ assert!(nc >= node_capacity);
+ assert!(ec >= edge_capacity);
+ assert!(sut.index.capacity() >= node_capacity);
+ }
+
+ #[test]
+ fn declare_new_unique_idents() -> AsgResult<()> {
+ let mut sut = Sut::with_capacity(0, 0);
+
+ // NB: The index ordering is important! We first use a larger
+ // index to create a gap, and then use an index within that gap
+ // to ensure that it's not considered an already-defined
+ // identifier.
+ let syma = Symbol::new_dummy(SymbolIndex::from_u32(5), "syma");
+ let symb = Symbol::new_dummy(SymbolIndex::from_u32(1), "symab");
+
+ let nodea = sut.declare(
+ &syma,
+ IdentKind::Meta,
+ Source {
+ desc: Some("a".to_string()),
+ ..Default::default()
+ },
+ )?;
+
+ let nodeb = sut.declare(
+ &symb,
+ IdentKind::Worksheet,
+ Source {
+ desc: Some("b".to_string()),
+ ..Default::default()
+ },
+ )?;
+
+ assert_ne!(nodea, nodeb);
+
+ assert_eq!(
+ Some(&Object::Ident(
+ &syma,
+ IdentKind::Meta,
+ Source {
+ desc: Some("a".to_string()),
+ ..Default::default()
+ },
+ )),
+ sut.get(nodea),
+ );
+
+ assert_eq!(
+ Some(&Object::Ident(
+ &symb,
+ IdentKind::Worksheet,
+ Source {
+ desc: Some("b".to_string()),
+ ..Default::default()
+ },
+ )),
+ sut.get(nodeb),
+ );
+
+ Ok(())
+ }
+
+ #[test]
+ fn lookup_by_symbol() -> AsgResult<()> {
+ let mut sut = Sut::with_capacity(0, 0);
+
+ let sym = Symbol::new_dummy(SymbolIndex::from_u32(1), "lookup");
+ let node = sut.declare(
+ &sym,
+ IdentKind::Meta,
+ Source {
+ generated: true,
+ ..Default::default()
+ },
+ )?;
+
+ assert_eq!(Some(node), sut.lookup(&sym));
+
+ Ok(())
+ }
+
+ #[test]
+ fn declare_extern() -> AsgResult<()> {
+ let mut sut = Sut::with_capacity(0, 0);
+
+ let sym = Symbol::new_dummy(SymbolIndex::from_u32(1), "extern");
+ let node = sut.declare_extern(&sym, IdentKind::Meta)?;
+
+ assert_eq!(Some(&Object::Extern(&sym, IdentKind::Meta)), sut.get(node),);
+
+ Ok(())
+ }
+
+ // TODO: incompatible
+ #[test]
+ fn declare_returns_existing_compatible() -> AsgResult<()> {
+ let mut sut = Sut::with_capacity(0, 0);
+
+ let sym = Symbol::new_dummy(SymbolIndex::from_u32(1), "symdup");
+ let node = sut.declare(&sym, IdentKind::Meta, Source::default())?;
+
+ // Same declaration a second time
+ let redeclare =
+ sut.declare(&sym, IdentKind::Meta, Source::default())?;
+
+ assert_eq!(node, redeclare);
+ Ok(())
+ }
+
+ // TODO: incompatible
+ #[test]
+ fn declare_override_virtual_ident() -> AsgResult<()> {
+ let mut sut = Sut::with_capacity(0, 0);
+
+ let sym = Symbol::new_dummy(SymbolIndex::from_u32(1), "virtual");
+ let over_src = Symbol::new_dummy(SymbolIndex::from_u32(2), "src");
+
+ let virt_node = sut.declare(
+ &sym,
+ IdentKind::Meta,
+ Source {
+ virtual_: true,
+ ..Default::default()
+ },
+ )?;
+
+ let over_src = Source {
+ override_: true,
+ src: Some(&over_src),
+ ..Default::default()
+ };
+
+ let over_node = sut.declare(&sym, IdentKind::Meta, over_src.clone())?;
+
+ assert_eq!(virt_node, over_node);
+
+ assert_eq!(
+ sut.get(over_node),
+ Some(&Object::Ident(&sym, IdentKind::Meta, over_src,))
+ );
+
+ Ok(())
+ }
+
+ // TODO: incompatible
+ #[test]
+ fn declare_override_virtual_ident_fragment() -> AsgResult<()> {
+ let mut sut = Sut::with_capacity(0, 0);
+
+ let sym = Symbol::new_dummy(SymbolIndex::from_u32(1), "virtual");
+ let over_src = Symbol::new_dummy(SymbolIndex::from_u32(2), "src");
+
+ let virt_node = sut.declare(
+ &sym,
+ IdentKind::Meta,
+ Source {
+ virtual_: true,
+ ..Default::default()
+ },
+ )?;
+
+ sut.set_fragment(virt_node, FragmentText::from("remove me"))?;
+
+ let over_src = Source {
+ override_: true,
+ src: Some(&over_src),
+ ..Default::default()
+ };
+
+ let over_node = sut.declare(&sym, IdentKind::Meta, over_src.clone())?;
+
+ assert_eq!(virt_node, over_node);
+
+ // The act of overriding the node should have cleared any existing
+ // fragment, making way for a new fragment to take its place as soon
+ // as it is discovered. (So, back to an Object::Ident.)
+ assert_eq!(
+ sut.get(over_node),
+ Some(&Object::Ident(&sym, IdentKind::Meta, over_src,))
+ );
+
+ Ok(())
+ }
+
+ #[test]
+ fn add_fragment_to_ident() -> AsgResult<()> {
+ let mut sut = Sut::with_capacity(0, 0);
+
+ let sym = Symbol::new_dummy(SymbolIndex::from_u32(1), "tofrag");
+ let src = Source {
+ generated: true,
+ ..Default::default()
+ };
+ let node = sut.declare(&sym, IdentKind::Meta, src.clone())?;
+
+ let fragment = "a fragment".to_string();
+ let node_with_frag = sut.set_fragment(node, fragment.clone())?;
+
+ // Attaching a fragment should _replace_ the node, not create a
+ // new one
+ assert_eq!(
+ node, node_with_frag,
+ "fragment node does not match original node"
+ );
+
+ assert_eq!(
+ Some(&Object::IdentFragment(&sym, IdentKind::Meta, src, fragment)),
+ sut.get(node)
+ );
+
+ Ok(())
+ }
+
+ #[test]
+ fn add_fragment_to_fragment_fails() -> AsgResult<()> {
+ let mut sut = Sut::with_capacity(0, 0);
+
+ let sym = Symbol::new_dummy(SymbolIndex::from_u32(1), "sym");
+ let node = sut.declare(&sym, IdentKind::Meta, Source::default())?;
+ let fragment = "orig fragment".to_string();
+
+ sut.set_fragment(node, fragment.clone())?;
+
+ // Since it's already a fragment, this should fail.
+ let err = sut
+ .set_fragment(node, "replacement".to_string())
+ .expect_err("Expected failure");
+
+ match err {
+ AsgError::BadFragmentDest(str) if str.contains("sym") => (),
+ _ => panic!("expected AsgError::BadFragmentDest: {:?}", err),
+ }
+
+ // Make sure we didn't leave the node in an inconsistent state
+ assert_eq!(
+ Some(&Object::IdentFragment(
+ &sym,
+ IdentKind::Meta,
+ Default::default(),
+ fragment
+ )),
+ sut.get(node)
+ );
+
+ Ok(())
+ }
+
+ #[test]
+ fn add_ident_dep_to_ident() -> AsgResult<()> {
+ let mut sut = Sut::with_capacity(0, 0);
+
+ let sym = Symbol::new_dummy(SymbolIndex::from_u32(1), "sym");
+ let dep = Symbol::new_dummy(SymbolIndex::from_u32(1), "dep");
+
+ let symnode = sut.declare(&sym, IdentKind::Meta, Source::default())?;
+ let depnode = sut.declare(&dep, IdentKind::Meta, Source::default())?;
+
+ sut.add_dep(symnode, depnode);
+ assert!(sut.has_dep(symnode, depnode));
+
+ // sanity check if we re-add a dep
+ sut.add_dep(symnode, depnode);
+ assert!(sut.has_dep(symnode, depnode));
+
+ Ok(())
+ }
+
+ // same as above test
+ #[test]
+ fn add_dep_lookup_existing() -> AsgResult<()> {
+ let mut sut = Sut::with_capacity(0, 0);
+
+ let sym = Symbol::new_dummy(SymbolIndex::from_u32(1), "sym");
+ let dep = Symbol::new_dummy(SymbolIndex::from_u32(2), "dep");
+
+ let _ = sut.declare(&sym, IdentKind::Meta, Source::default())?;
+ let _ = sut.declare(&dep, IdentKind::Meta, Source::default())?;
+
+ let (symnode, depnode) = sut.add_dep_lookup(&sym, &dep);
+ assert!(sut.has_dep(symnode, depnode));
+
+ Ok(())
+ }
+
+ #[test]
+ fn add_dep_lookup_missing() -> AsgResult<()> {
+ let mut sut = Sut::with_capacity(0, 0);
+
+ let sym = Symbol::new_dummy(SymbolIndex::from_u32(1), "sym");
+ let dep = Symbol::new_dummy(SymbolIndex::from_u32(2), "dep");
+
+ // both of these are missing
+ let (symnode, depnode) = sut.add_dep_lookup(&sym, &dep);
+ assert!(sut.has_dep(symnode, depnode));
+
+ assert_eq!(Some(&Object::Missing(&sym)), sut.get(symnode));
+ assert_eq!(Some(&Object::Missing(&dep)), sut.get(depnode));
+
+ Ok(())
+ }
+
+ #[test]
+ fn declare_return_missing_symbol() -> AsgResult<()> {
+ let mut sut = Sut::with_capacity(0, 0);
+
+ let sym = Symbol::new_dummy(SymbolIndex::from_u32(1), "sym");
+ let dep = Symbol::new_dummy(SymbolIndex::from_u32(2), "dep");
+
+ // both of these are missing, see add_dep_lookup_missing
+ let (symnode, _) = sut.add_dep_lookup(&sym, &dep);
+
+ let src = Source {
+ desc: Some("Tamer is NOT lamer.".to_string()),
+ ..Default::default()
+ };
+
+ // Check with a declared value
+ let declared = sut.declare(&sym, IdentKind::Meta, src.clone())?;
+
+ assert_eq!(symnode, declared);
+
+ assert_eq!(
+ Some(&Object::Ident(&sym, IdentKind::Meta, src)),
+ sut.get(declared),
+ );
+
+ Ok(())
+ }
+}
diff --git a/tamer/src/ir/asg/graph.rs b/tamer/src/ir/asg/graph.rs
new file mode 100644
index 0000000..e2135e4
--- /dev/null
+++ b/tamer/src/ir/asg/graph.rs
@@ -0,0 +1,249 @@
+// Graph abstraction
+//
+// Copyright (C) 2014-2019 Ryan Specialty Group, LLC.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+//! Abstract graph as the basis for concrete ASGs.
+
+use super::ident::IdentKind;
+use super::object::{FragmentText, Object, Source};
+use crate::sym::Symbol;
+use petgraph::graph::{IndexType, NodeIndex};
+use std::result::Result;
+
+/// An abstract semantic graph of [objects][Object].
+///
+/// This IR focuses on the definition and manipulation of objects and their
+/// dependencies.
+/// See [`Object`] for a summary of valid object state transitions.
+///
+/// Objects are never deleted from the graph,
+/// so [`ObjectRef`]s will remain valid for the lifetime of the ASG.
+///
+/// For more information,
+/// see the [module-level documentation][self].
+pub trait Asg<'i, Ix: IndexType> {
+ /// Declare a concrete identifier.
+ ///
+ /// An identifier declaration is similar to a declaration in a header
+ /// file in a language like C,
+ /// describing the structure of the identifier.
+ /// Once declared,
+ /// this information cannot be changed.
+ ///
+ /// Identifiers are uniquely identified by a [`Symbol`] `name`.
+ /// If an identifier of the same `name` already exists,
+ /// then the provided declaration is compared against the existing
+ /// declaration---should
+ /// they be incompatible,
+ /// then the operation will fail;
+ /// otherwise,
+ /// the existing identifier will be returned.
+ /// A successful declaration will add a [`Object::Ident`] to the graph
+ /// and return an [`ObjectRef`] reference.
+ ///
+ /// If an existing identifier is an extern (see
+ /// [`Asg::declare_extern`]),
+ /// then the declaration will be compared just the same,
+ /// but the identifier will be converted from a
+ /// [`Object::Extern`] into a [`Object::Ident`].
+ /// When this happens,
+ /// the extern is said to be _resolved_.
+ ///
+ /// If a virtual identifier of type [`Object::IdentFragment`] is
+ /// overridden,
+ /// then its fragment is cleared
+ /// (it returns to a [`Object::Ident`])
+ /// to make way for the fragment of the override.
+ fn declare(
+ &mut self,
+ name: &'i Symbol<'i>,
+ kind: IdentKind,
+ src: Source<'i>,
+ ) -> AsgResult<ObjectRef<Ix>>;
+
+ /// Declare an abstract identifier.
+ ///
+ /// An _extern_ declaration declares an identifier the same as
+ /// [`Asg::declare`],
+ /// but instead as [`Object::Extern`].
+ /// Externs are identifiers that are expected to be defined somewhere
+ /// else ("externally"),
+ /// and are resolved at [link-time][crate::ld].
+ ///
+ /// If a concrete identifier has already been declared (see
+ /// [`Asg::declare`]),
+ /// then the declarations will be compared and,
+ /// if compatible,
+ /// the identifier will be immediately _resolved_ and the object
+ /// on the graph will not be altered.
+ /// Resolution will otherwise fail in error.
+ fn declare_extern(
+ &mut self,
+ name: &'i Symbol<'i>,
+ expected_kind: IdentKind,
+ ) -> AsgResult<ObjectRef<Ix>>;
+
+ /// Set the fragment associated with a concrete identifier.
+ ///
+ /// This changes the type of the identifier from [`Object::Ident`]
+ /// into [`Object::IdentFragment`],
+ /// which is intended for use by the [linker][crate::ld].
+ fn set_fragment(
+ &mut self,
+ identi: ObjectRef<Ix>,
+ text: FragmentText,
+ ) -> AsgResult<ObjectRef<Ix>>;
+
+ /// Retrieve an object from the graph by [`ObjectRef`].
+ ///
+ /// Since an [`ObjectRef`] should only be produced by an [`Asg`],
+ /// and since objects are never deleted from the graph,
+ /// this should never fail so long as references are not shared
+ /// between multiple graphs.
+ /// It is nevertheless wrapped in an [`Option`] just in case.
+ fn get<I: Into<ObjectRef<Ix>>>(&self, index: I) -> Option<&Object<'i>>;
+
+ /// Attempt to retrieve an identifier from the graph by name.
+ ///
+ /// Since only identifiers carry a name,
+ /// this method cannot be used to retrieve all possible objects on the
+ /// graph---for
+ /// that, see [`Asg::get`].
+ fn lookup(&self, name: &'i Symbol<'i>) -> Option<ObjectRef<Ix>>;
+
+ /// Declare that `dep` is a dependency of `ident`.
+ ///
+ /// An object must be declared as a dependency if its value must be
+ /// computed before computing the value of `ident`.
+ /// The [linker][crate::ld] will ensure this ordering.
+ ///
+ /// See [`add_dep_lookup`][Asg::add_dep_lookup] if identifiers have to
+ /// be looked up by [`Symbol`] or if they may not yet have been
+ /// declared.
+ fn add_dep(&mut self, ident: ObjectRef<Ix>, dep: ObjectRef<Ix>);
+
+ /// Check whether `dep` is a dependency of `ident`.
+ fn has_dep(&self, ident: ObjectRef<Ix>, dep: ObjectRef<Ix>) -> bool;
+
+ /// Declare that `dep` is a dependency of `ident`,
+ /// regardless of whether they are known.
+ ///
+ /// In contrast to [`add_dep`][Asg::add_dep],
+ /// this method will add the dependency even if one or both of `ident`
+ /// or `dep` have not yet been declared.
+ /// In such a case,
+ /// an [`Object::Missing`] will be added as a placeholder for the
+ /// missing identifier,
+ /// allowing the ASG to be built with partial information as
+ /// identifiers continue to be discovered.
+ ///
+ /// References to both identifiers are returned in argument order.
+ fn add_dep_lookup(
+ &mut self,
+ ident: &'i Symbol<'i>,
+ dep: &'i Symbol<'i>,
+ ) -> (ObjectRef<Ix>, ObjectRef<Ix>);
+}
+
+/// A [`Result`] with a hard-coded [`AsgError`] error type.
+///
+/// This is the result of every [`Asg`] operation that could potentially
+/// fail in error.
+pub type AsgResult<T> = Result<T, AsgError>;
+
+/// Reference to an [object][Object] stored within the [`Asg`].
+///
+/// Object references are integer offsets,
+/// not pointers.
+/// See the [module-level documentation][self] for more information.
+#[derive(Debug, Copy, Clone, Default, PartialEq, Eq)]
+pub struct ObjectRef<Ix>(pub NodeIndex<Ix>);
+
+impl<Ix> From<NodeIndex<Ix>> for ObjectRef<Ix>
+where
+ Ix: IndexType,
+{
+ fn from(index: NodeIndex<Ix>) -> Self {
+ Self(index)
+ }
+}
+
+impl<Ix> From<ObjectRef<Ix>> for NodeIndex<Ix>
+where
+ Ix: IndexType,
+{
+ fn from(objref: ObjectRef<Ix>) -> Self {
+ objref.0
+ }
+}
+
+/// There are currently no data stored on edges ("edge weights").
+pub type AsgEdge = ();
+
+/// Each node of the graph represents an object.
+///
+/// Enclosed in an [`Option`] to permit moving owned values out of the
+/// graph.
+pub type Node<'i> = Option<Object<'i>>;
+
+/// An error from an ASG operation.
+///
+/// Storing [`Symbol`] would require that this have a lifetime,
+/// which is very inconvenient when chaining [`Result`],
+/// so this stores only owned values.
+/// The caller will know the problem values.
+#[derive(Debug, PartialEq)]
+pub enum AsgError {
+ /// The provided identifier is not in a state that is permitted to
+ /// receive a fragment.
+ ///
+ /// See [`Asg::set_fragment`] for more information.
+ BadFragmentDest(String),
+}
+
+impl std::fmt::Display for AsgError {
+ fn fmt(&self, fmt: &mut std::fmt::Formatter) -> std::fmt::Result {
+ match self {
+ Self::BadFragmentDest(msg) => {
+ write!(fmt, "bad fragment destination: {}", msg)
+ }
+ }
+ }
+}
+
+impl std::error::Error for AsgError {
+ fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
+ None
+ }
+}
+
+#[cfg(test)]
+mod test {
+ use super::*;
+
+ mod objref {
+ use super::*;
+
+ #[test]
+ fn to_from_nodeindex() {
+ let index = NodeIndex::<u32>::new(5);
+ let objref: ObjectRef<u32> = ObjectRef::from(index);
+
+ assert_eq!(index, objref.0);
+ assert_eq!(index, objref.into());
+ }
+ }
+}
diff --git a/tamer/src/ir/asg/ident.rs b/tamer/src/ir/asg/ident.rs
new file mode 100644
index 0000000..a8291ae
--- /dev/null
+++ b/tamer/src/ir/asg/ident.rs
@@ -0,0 +1,376 @@
+// ASG identifiers
+//
+// Copyright (C) 2014-2019 Ryan Specialty Group, LLC.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+//! Identifiers (a type of [object][super::object::Object]).
+
+use crate::ir::legacyir::{SymAttrs, SymDtype, SymType};
+use std::convert::TryFrom;
+
+/// Types of identifiers.
+///
+/// Here, the term _calculation_ refers to a composable expression that
+/// produces a numeric result.
+///
+/// These are derived from [`legacyir::SymType`][crate::ir::legacyir::SymType]
+/// and will be generalized in the future.
+#[derive(Debug, PartialEq, Eq)]
+pub enum IdentKind {
+ /// Classification generator.
+ ///
+ /// This has the same number of dimensions as its highest-dimension
+ /// predicate.
+ /// Every [`Class`][IdentKind::Class] has an associated generator.
+ Cgen(Dim),
+
+ /// Boolean classification.
+ ///
+ /// This is an artifact of an ancient system.
+ /// The dimensions here refers to the dimensions of the associated
+ /// [`Cgen`][IdentKind::Cgen].
+ Class(Dim),
+
+ /// Constant value.
+ Const(Dim, DataType),
+
+ /// Re-usable encapsulated expression.
+ ///
+ /// Functions are nothing more than expressions that can be re-used with
+ /// dynamic values at runtime.
+ /// See also [`Lparam`][IdentKind::Lparam].
+ Func(Dim, DataType),
+
+ /// Generating calculation.
+ ///
+ /// Generators are associated with iterative expressions,
+ /// such as sums and products.
+ /// They always have a parent [`Rate`][IdentKind::Rate].
+ Gen(Dim, DataType),
+
+ /// Local (non-global) parameter.
+ ///
+ /// Local parameters are lexically scoped to their parent expression:
+ /// - [`Func`][IdentKind::Func], where there exists one per defined
+ /// function parameter; and
+ /// - `let` expression bindings.
+ ///
+ /// This is not to be confused with the global
+ /// [`Param`][IdentKind::Param].
+ Lparam(Dim, DataType),
+
+ /// Global parameter.
+ ///
+ /// These parameters serve as inputs to the system.
+ /// Input values are bound using [`Map`][IdentKind::Map].
+ Param(Dim, DataType),
+
+ /// Scalar result of a named calculation.
+ ///
+ /// The verb "rate" is historical,
+ /// since TAME was developed for insurance rating systems.
+ /// This represents a named expression that yields a scalar value.
+ ///
+ /// This serves as a parent to [`Gen`][IdentKind::Gen].
+ Rate(DataType),
+
+ /// Template definition.
+ ///
+ /// A template is used only at expansion-time and,
+ /// unlike most other things in the system,
+ /// have no runtime value.
+ Tpl,
+
+ /// User-defined data type.
+ ///
+ /// The only types typically defined are enums and unions of enums.
+ /// The type itself has no runtime value,
+ /// but each of the enum variants have an associated value of type
+ /// [`DataType`].
+ Type(DataType),
+
+ /// Input map head (meta identifier generated by compiler for each input
+ /// map).
+ MapHead,
+
+ /// Input field→param mapping.
+ ///
+ /// These may only map to [`Param`][IdentKind::Param].
+ /// The source data is arbitrary and provided at runtime.
+ Map,
+
+ /// Input map tail (meta symbol generated by compiler for each input
+ /// map).
+ MapTail,
+
+ /// Return map head (meta symbol generated by compiler for each return
+ /// map).
+ RetMapHead,
+
+ /// Return param→field mapping.
+ ///
+ /// Return mappings export data to calling systems.
+ /// They can map back any globally defined numeric expression.
+ RetMap,
+
+ /// Return map tail (meta symbol generated by compiler for each return
+ /// map).
+ RetMapTail,
+
+ /// Arbitrary metadata.
+ ///
+ /// This permits the definition of static key/value data that is
+ /// compiled into the final executable.
+ Meta,
+
+ /// Rating worksheet (generated by compiler for worksheet packages).
+ ///
+ /// The worksheet exposes intermediate calculation values in a much more
+ /// concise form than that of the Summary Page.
+ Worksheet,
+}
+
+impl<'i> TryFrom<SymAttrs<'i>> for IdentKind {
+ type Error = &'static str;
+
+ /// Attempt to raise [`SymAttrs`] into an [`IdentKind`].
+ ///
+ /// Certain [`IdentKind`] require that certain attributes be present,
+ /// otherwise the conversion will fail.
+ fn try_from(attrs: SymAttrs<'i>) -> Result<Self, Self::Error> {
+ Self::try_from(&attrs)
+ }
+}
+
+impl<'i> TryFrom<&SymAttrs<'i>> for IdentKind {
+ type Error = &'static str;
+
+ /// Attempt to raise [`SymAttrs`] into an [`IdentKind`].
+ ///
+ /// Certain [`IdentKind`] require that certain attributes be present,
+ /// otherwise the conversion will fail.
+ fn try_from(attrs: &SymAttrs<'i>) -> Result<Self, Self::Error> {
+ let ty = attrs.ty.as_ref().ok_or("missing symbol type")?;
+
+ macro_rules! ident {
+ ($to:expr) => {
+ Ok($to)
+ };
+ ($to:expr, dim) => {
+ Ok($to(Dim(attrs.dim.ok_or("missing dim")?)))
+ };
+ ($to:expr, dtype) => {
+ Ok($to(attrs.dtype.ok_or("missing dtype")?))
+ };
+ ($to:expr, dim, dtype) => {
+ Ok($to(
+ Dim(attrs.dim.ok_or("missing dim")?),
+ attrs.dtype.ok_or("missing dtype")?,
+ ))
+ };
+ }
+
+ match ty {
+ SymType::Cgen => ident!(Self::Cgen, dim),
+ SymType::Class => ident!(Self::Class, dim),
+ SymType::Const => ident!(Self::Const, dim, dtype),
+ SymType::Func => ident!(Self::Func, dim, dtype),
+ SymType::Gen => ident!(Self::Gen, dim, dtype),
+ SymType::Lparam => ident!(IdentKind::Lparam, dim, dtype),
+ SymType::Param => ident!(IdentKind::Param, dim, dtype),
+ SymType::Rate => ident!(IdentKind::Rate, dtype),
+ SymType::Tpl => ident!(IdentKind::Tpl),
+ SymType::Type => ident!(IdentKind::Type, dtype),
+ SymType::MapHead => ident!(IdentKind::MapHead),
+ SymType::Map => ident!(IdentKind::Map),
+ SymType::MapTail => ident!(IdentKind::MapTail),
+ SymType::RetMapHead => ident!(IdentKind::RetMapHead),
+ SymType::RetMap => ident!(IdentKind::RetMap),
+ SymType::RetMapTail => ident!(IdentKind::RetMapTail),
+ SymType::Meta => ident!(IdentKind::Meta),
+ SymType::Worksheet => ident!(IdentKind::Worksheet),
+ }
+ }
+}
+
+/// Identifier dimensions.
+///
+/// This determines the number of subscripts needed to access a scalar
+/// value.
+/// A value of `0` indicates a scalar;
+/// a value of `1` indicates a vector;
+/// a value of `2` indicates a matrix;
+/// and a value of `n` indicates a multi-dimensional array of
+/// depth `n`.
+#[derive(Debug, PartialEq, Eq, Clone, Copy, Default)]
+pub struct Dim(u8);
+
+/// Underlying datatype of identifier.
+///
+/// TODO: This will always be 0≤n≤9, so let's introduce a newtype for it.
+impl AsRef<str> for Dim {
+ fn as_ref(&self) -> &str {
+ match self.0 {
+ 0 => &"0",
+ 1 => &"1",
+ 2 => &"2",
+ 3 => &"3",
+ 4 => &"4",
+ 5 => &"5",
+ 6 => &"6",
+ 7 => &"7",
+ 8 => &"8",
+ 9 => &"9",
+ _ => unreachable!(),
+ }
+ }
+}
+
+/// Underlying datatype of identifier.
+pub type DataType = SymDtype;
+
+#[cfg(test)]
+mod test {
+ use super::*;
+ use std::convert::TryInto;
+
+ #[test]
+ fn dim_to_str() {
+ // we'll just test high and low
+ let low: &str = Dim(0).as_ref();
+ let high: &str = Dim(9).as_ref();
+
+ assert_eq!("0", low);
+ assert_eq!("9", high);
+ }
+
+ macro_rules! test_kind {
+ ($name:ident, $src:expr => $dest:expr) => {
+ #[test]
+ fn $name() {
+ assert_eq!(
+ Ok($dest),
+ SymAttrs {
+ ty: Some($src),
+ ..Default::default()
+ }
+ .try_into()
+ );
+ }
+ };
+
+ ($name:ident, $src:expr => $dest:expr, dim) => {
+ #[test]
+ fn $name() {
+ let dim = 1;
+
+ assert_eq!(
+ Ok($dest(Dim(dim))),
+ SymAttrs {
+ ty: Some($src),
+ dim: Some(dim),
+ ..Default::default()
+ }
+ .try_into()
+ );
+
+ // no dim
+ IdentKind::try_from(SymAttrs {
+ ty: Some($src),
+ ..Default::default()
+ })
+ .expect_err("must fail when missing dim");
+ }
+ };
+
+ ($name:ident, $src:expr => $dest:expr, dtype) => {
+ #[test]
+ fn $name() {
+ let dtype = SymDtype::Float;
+
+ assert_eq!(
+ Ok($dest(dtype)),
+ SymAttrs {
+ ty: Some($src),
+ dtype: Some(dtype),
+ ..Default::default()
+ }
+ .try_into()
+ );
+
+ // no dtype
+ IdentKind::try_from(SymAttrs {
+ ty: Some($src),
+ ..Default::default()
+ })
+ .expect_err("must fail when missing dtype");
+ }
+ };
+
+ ($name:ident, $src:expr => $dest:expr, dim, dtype) => {
+ #[test]
+ fn $name() {
+ let dim = 1;
+ let dtype = SymDtype::Float;
+
+ assert_eq!(
+ Ok($dest(Dim(dim), dtype)),
+ SymAttrs {
+ ty: Some($src),
+ dim: Some(dim),
+ dtype: Some(dtype),
+ ..Default::default()
+ }
+ .try_into()
+ );
+
+ // no dim
+ IdentKind::try_from(SymAttrs {
+ ty: Some($src),
+ dtype: Some(dtype),
+ ..Default::default()
+ })
+ .expect_err("must fail when missing dim");
+
+ // no dtype
+ IdentKind::try_from(SymAttrs {
+ ty: Some($src),
+ dim: Some(dim),
+ ..Default::default()
+ })
+ .expect_err("must fail when missing dtype");
+ }
+ };
+ }
+
+ test_kind!(cgen, SymType::Cgen => IdentKind::Cgen, dim);
+ test_kind!(class, SymType::Class => IdentKind::Class, dim);
+ test_kind!(r#const, SymType::Const => IdentKind::Const, dim, dtype);
+ test_kind!(func, SymType::Func => IdentKind::Func, dim, dtype);
+ test_kind!(gen, SymType::Gen => IdentKind::Gen, dim, dtype);
+ test_kind!(lparam, SymType::Lparam => IdentKind::Lparam, dim, dtype);
+ test_kind!(param, SymType::Param => IdentKind::Param, dim, dtype);
+ test_kind!(rate, SymType::Rate => IdentKind::Rate, dtype);
+ test_kind!(tpl, SymType::Tpl => IdentKind::Tpl);
+ test_kind!(r#type, SymType::Type => IdentKind::Type, dtype);
+ test_kind!(maphead, SymType::MapHead => IdentKind::MapHead);
+ test_kind!(map, SymType::Map => IdentKind::Map);
+ test_kind!(maptail, SymType::MapTail => IdentKind::MapTail);
+ test_kind!(retmaphead, SymType::RetMapHead => IdentKind::RetMapHead);
+ test_kind!(retmap, SymType::RetMap => IdentKind::RetMap);
+ test_kind!(retmaptail, SymType::RetMapTail => IdentKind::RetMapTail);
+ test_kind!(meta, SymType::Meta => IdentKind::Meta);
+ test_kind!(worksheet, SymType::Worksheet => IdentKind::Worksheet);
+}
diff --git a/tamer/src/ir/asg/mod.rs b/tamer/src/ir/asg/mod.rs
new file mode 100644
index 0000000..0e49eb2
--- /dev/null
+++ b/tamer/src/ir/asg/mod.rs
@@ -0,0 +1,192 @@
+// Abstract semantic graph (ASG) intermediate representation (IR)
+//
+// Copyright (C) 2014-2019 Ryan Specialty Group, LLC.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+//! Abstract semantic graph.
+//!
+//! The [abstract semantic graph][asg] (ASG) is an IR representing the
+//! relationship between objects using a directed [graph][].
+//! An _object_ is an identifier or expression.
+//!
+//! Since TAME is a declarative language,
+//! the ASG does not represent control flow;
+//! instead, it represents the relationship between objects and their
+//! dependencies.
+//! Control flow is determined solely by the [linker][crate::ld] based on
+//! these dependencies.
+//!
+//! See [`crate::global`] for available index sizes depending on context.
+//! For example,
+//! a linker may choose to use [`crate::global::ProgIdentSize`];
+//!
+//!
+//! Graph Structure
+//! ===============
+//! Each node (vector) in the graph represents an [object][Object],
+//! such as an identifier or an expression.
+//! Each directed edge `(A->B)` represents that `A` depends upon `B`.
+//!
+//! Graphs may contain cycles for recursive functions—that is,
+//! TAME's ASG is _not_ a DAG.
+//! Mutually recursive functions are therefore represented as
+//! [strongly connected components][scc].
+//!
+//! [asg]: https://en.wikipedia.org/wiki/Abstract_semantic_graph
+//! [graph]: https://en.wikipedia.org/wiki/Graph_(discrete_mathematics)
+//! [scc]: https://en.wikipedia.org/wiki/Strongly_connected_component
+//!
+//! Each object may have a number of valid states;
+//! see [`Object`] for valid object states and transitions.
+//!
+//!
+//! How To Use
+//! ==========
+//! A suitable concrete [`Asg`] implementation is provided by
+//! [`DefaultAsg`].
+//!
+//! ```
+//! use tamer::global;
+//! use tamer::ir::asg::{Asg, DefaultAsg, IdentKind, Object, Source};
+//! use tamer::sym::{Interner, DefaultInterner};
+//!
+//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
+//! // Be sure to choose size and initial capacities appropriate for your
+//! // situation.
+//! let mut asg = DefaultAsg::<global::PkgIdentSize>::with_capacity(1024, 1024);
+//!
+//! let interner = DefaultInterner::new();
+//! let identa_sym = interner.intern("identa");
+//! let identb_sym = interner.intern("identb");
+//!
+//! let identa = asg.declare(identa_sym, IdentKind::Meta, Source::default())?;
+//! let identb = asg.declare_extern(identb_sym, IdentKind::Meta)?;
+//!
+//! assert_eq!(
+//! Some(&Object::Extern(identb_sym, IdentKind::Meta)),
+//! asg.get(identb),
+//! );
+//!
+//! // Dependencies can be declared even if an identifier is
+//! // unresolved. This declares `(identa)->(identb)`.
+//! asg.add_dep(identa, identb);
+//! assert!(asg.has_dep(identa, identb));
+//!
+//! // TODO: extern resolution
+//!
+//! // Identifiers are indexed by symbol name.
+//! assert_eq!(Some(identa), asg.lookup(identa_sym));
+//! #
+//! # Ok(()) // main
+//! # }
+//! ```
+//!
+//! Missing Identifiers
+//! -------------------
+//! Since identifiers in TAME can be defined in any order relative to their
+//! dependencies within a source file,
+//! it is often the case that a dependency will have to be added to the
+//! graph before it is resolved.
+//! For example,
+//! [`Asg::add_dep_lookup`] will add an [`Object::Missing`] to the graph
+//! if either identifier has not yet been declared.
+//!
+//! ```
+//! # use tamer::global;
+//! # use tamer::ir::asg::{Asg, DefaultAsg, IdentKind, Object, FragmentText, Source};
+//! # use tamer::sym::{Interner, DefaultInterner};
+//! #
+//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
+//! # let mut asg = DefaultAsg::<global::PkgIdentSize>::with_capacity(1024, 1024);
+//! # let interner = DefaultInterner::new();
+//! #
+//! let identa_sym = interner.intern("identa");
+//! let identb_sym = interner.intern("identb");
+//! let (identa, identb) = asg.add_dep_lookup(identa_sym, identb_sym);
+//!
+//! assert_eq!(Some(&Object::Missing(identa_sym)), asg.get(identa));
+//! assert_eq!(Some(&Object::Missing(identb_sym)), asg.get(identb));
+//!
+//! // The identifiers returned above are proper objects on the graph.
+//! assert_eq!(Some(identa), asg.lookup(identa_sym));
+//! assert_eq!(Some(identb), asg.lookup(identb_sym));
+//!
+//! // Once declared, the missing identifier changes state and dependencies
+//! // are retained.
+//! asg.declare(identa_sym, IdentKind::Meta, Source::default())?;
+//!
+//! assert_eq!(
+//! Some(&Object::Ident(identa_sym, IdentKind::Meta, Source::default())),
+//! asg.get(identa),
+//! );
+//!
+//! assert!(asg.has_dep(identa, identb));
+//! #
+//! # Ok(()) // main
+//! # }
+//! ```
+//!
+//! Fragments
+//! ---------
+//! A compiled fragment can be attached to any resolved identifier (see
+//! [`Object::Ident`]) using [`Asg::set_fragment`].
+//! Doing so changes the state of the identifier to [`Object::IdentFragment`],
+//! and it is an error to attempt to overwrite that fragment once it is
+//! set.
+//!
+//! ```
+//! # use tamer::global;
+//! # use tamer::ir::asg::{Asg, DefaultAsg, IdentKind, Object, FragmentText, Source};
+//! # use tamer::sym::{Interner, DefaultInterner};
+//! #
+//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
+//! # let mut asg = DefaultAsg::<global::PkgIdentSize>::with_capacity(1024, 1024);
+//! # let interner = DefaultInterner::new();
+//! #
+//! // Fragments can be attached to resolved identifiers.
+//! let ident = asg.declare(
+//! interner.intern("ident"), IdentKind::Meta, Source::default()
+//! )?;
+//! asg.set_fragment(ident, FragmentText::from("test fragment"))?;
+//!
+//! assert_eq!(
+//! Some(&Object::IdentFragment(
+//! interner.intern("ident"),
+//! IdentKind::Meta,
+//! Source::default(),
+//! FragmentText::from("test fragment"),
+//! )),
+//! asg.get(ident),
+//! );
+//!
+//! // But overwriting will fail
+//! let bad = asg.set_fragment(ident, FragmentText::from("overwrite"));
+//! assert!(bad.is_err());
+//! #
+//! # Ok(()) // main
+//! # }
+//! ```
+
+mod base;
+mod graph;
+mod ident;
+mod object;
+
+pub use graph::{Asg, AsgResult, ObjectRef};
+pub use ident::{Dim, IdentKind};
+pub use object::{FragmentText, Object, Source};
+
+/// Default concrete ASG implementation.
+pub type DefaultAsg<'i, Ix> = base::BaseAsg<'i, Ix>;
diff --git a/tamer/src/ir/asg/object.rs b/tamer/src/ir/asg/object.rs
new file mode 100644
index 0000000..282834a
--- /dev/null
+++ b/tamer/src/ir/asg/object.rs
@@ -0,0 +1,230 @@
+// Objects represented on ASG
+//
+// Copyright (C) 2014-2019 Ryan Specialty Group, LLC.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+//! Objects represented by the ASG.
+//!
+//! _This is a private module.
+//! See [`super`] for available exports._
+
+use super::ident::IdentKind;
+use crate::ir::legacyir::SymAttrs;
+use crate::sym::Symbol;
+
+/// Type of object.
+///
+/// These types represent object states:
+///
+/// ```text
+/// ,-> (Missing) -------.
+/// / \ \
+/// / v v
+/// ((Empty)) -> (Extern) -> ((Ident)) -> ((IdentFragment)).
+/// \ ^ /
+/// \ / \ /
+/// `--------------------` `-----------'
+/// ```
+///
+/// The [`Empty`][Object::Empty] state is never directly accessable
+/// through [`Asg`][super::Asg]'s public API,
+/// as it represents the _absence_ of an object at that node within the
+/// ASG.
+#[derive(Debug, PartialEq)]
+pub enum Object<'i> {
+ /// An identifier is expected to be defined but is not yet available.
+ ///
+ /// This variant contains the symbol representing the name of the
+ /// expected identifier.
+ /// By defining an object as missing,
+ /// this allows the graph to be built incrementally as objects are
+ /// discovered.
+ ///
+ /// Note that this is different than [`Empty`][Object::Empty].
+ Missing(&'i Symbol<'i>),
+
+ /// A resolved identifier.
+ ///
+ /// This represents an identifier that has been declared with certain
+ /// type information.
+ Ident(&'i Symbol<'i>, IdentKind, Source<'i>),
+
+ /// An identifier that has not yet been resolved.
+ ///
+ /// Externs are upgraded to [`Object::Ident`] once an identifier of
+ /// the same name is loaded.
+ /// It is an error if the loaded identifier does not have a compatible
+ /// [`IdentKind`].
+ Extern(&'i Symbol<'i>, IdentKind),
+
+ /// Identifier with associated text.
+ ///
+ /// Code fragments are portions of the target language associated with
+ /// an identifier.
+ /// They are produced by the compiler and it is the job of the
+ /// [linker][crate::ld] to put them into the correct order for the
+ /// final executable.
+ IdentFragment(&'i Symbol<'i>, IdentKind, Source<'i>, FragmentText),
+
+ /// The empty node (default value for indexer).
+ ///
+ /// This is not a valid state accessible via [`Asg`][super::Asg].
+ ///
+ /// Note that this is different than [`Missing`][Object::Missing].
+ Empty,
+}
+
+/// Compiled fragment for identifier.
+///
+/// This represents the text associated with an identifier.
+pub type FragmentText = String;
+
+/// Metadata about the source of an object.
+///
+/// This contains information from the symbol table that does not belong on
+/// [`IdentKind`],
+/// since that stores _type_ information.
+///
+/// TODO: This does not currently store byte offsets within the source file
+/// since the original XSLT-based compiler did not have that capability;
+/// this will provide that information in the future.
+#[derive(Debug, Default, PartialEq, Clone)]
+pub struct Source<'i> {
+ /// Name of package containing reference to this object.
+ pub pkg_name: Option<&'i Symbol<'i>>,
+
+ /// Relative path to the source of this object,
+ /// if not present in the current package.
+ pub src: Option<&'i Symbol<'i>>,
+
+ /// The identifier from which this one is derived.
+ ///
+ /// See [`IdentKind`] for more information on parents.
+ /// For example,
+ /// a [`IdentKind::Cgen`] always has a parent [`IdentKind::Class`].
+ pub parent: Option<&'i Symbol<'i>>,
+
+ /// Child identifier associated with this identifier.
+ ///
+ /// For [`IdentKind::Class`],
+ /// this represents an associated [`IdentKind::Cgen`].
+ pub yields: Option<&'i Symbol<'i>>,
+
+ /// User-friendly identifier description.
+ ///
+ /// This is used primarily by [`IdentKind::Class`] and
+ /// [`IdentKind::Gen`].
+ pub desc: Option<String>,
+
+ /// Whether this identifier was generated by the compiler.
+ ///
+ /// A generated identifier is representative of an internal
+ /// implementation detail that should remain encapsulated from the
+ /// user and is subject to change over time.
+ ///
+ /// Identifiers created by templates are not considered to be generated.
+ pub generated: bool,
+
+ /// Related identifiers.
+ ///
+ /// These data represent a kluge created to add additional symbol
+ /// information in two different contexts:
+ ///
+ /// - [`IdentKind::Map`] includes the name of the source field; and
+ /// - [`IdentKind::Func`] lists params in order (so that the compiler
+ /// knows application order).
+ ///
+ /// TODO: We have `parent`, `yields`, and `from`.
+ /// We should begin to consolodate.
+ pub from: Option<Vec<&'i Symbol<'i>>>,
+
+ /// Whether identifier is virtual (can be overridden).
+ ///
+ /// This feature adds complexity and will ideally be removed in the
+ /// future.
+ ///
+ /// See also [`override`][Source::override_].
+ pub virtual_: bool,
+
+ /// Whether identifier overrides a virtual identifier.
+ ///
+ /// This feature adds complexity and will ideally be removed in the
+ /// future.
+ ///
+ /// See also [`virtual_`][Source::virtual_].
+ pub override_: bool,
+}
+
+impl<'i> From<SymAttrs<'i>> for Source<'i> {
+ /// Raise Legacy IR [`SymAttrs`].
+ ///
+ /// This simply extracts a subset of fields from the source attributes.
+ fn from(attrs: SymAttrs<'i>) -> Self {
+ Source {
+ pkg_name: attrs.pkg_name,
+ src: attrs.src,
+ generated: attrs.generated,
+ parent: attrs.parent,
+ yields: attrs.yields,
+ desc: attrs.desc,
+ from: attrs.from,
+ virtual_: attrs.virtual_,
+ override_: attrs.override_,
+ }
+ }
+}
+
+#[cfg(test)]
+mod test {
+ use super::*;
+ use crate::sym::SymbolIndex;
+
+ #[test]
+ fn source_from_sym_attrs() {
+ let nsym = Symbol::new_dummy(SymbolIndex::from_u32(1), "name");
+ let ssym = Symbol::new_dummy(SymbolIndex::from_u32(2), "src");
+ let psym = Symbol::new_dummy(SymbolIndex::from_u32(3), "parent");
+ let ysym = Symbol::new_dummy(SymbolIndex::from_u32(4), "yields");
+ let fsym = Symbol::new_dummy(SymbolIndex::from_u32(5), "from");
+
+ let attrs = SymAttrs {
+ pkg_name: Some(&nsym),
+ src: Some(&ssym),
+ generated: true,
+ parent: Some(&psym),
+ yields: Some(&ysym),
+ desc: Some("sym desc".to_string()),
+ from: Some(vec![&fsym]),
+ virtual_: true,
+ override_: true,
+ ..Default::default()
+ };
+
+ assert_eq!(
+ Source {
+ pkg_name: Some(&nsym),
+ src: Some(&ssym),
+ generated: attrs.generated,
+ parent: attrs.parent,
+ yields: attrs.yields,
+ desc: Some("sym desc".to_string()),
+ from: Some(vec![&fsym]),
+ virtual_: true,
+ override_: true,
+ },
+ attrs.into(),
+ );
+ }
+}
diff --git a/tamer/src/ir/legacyir.rs b/tamer/src/ir/legacyir.rs
new file mode 100644
index 0000000..bdf33a8
--- /dev/null
+++ b/tamer/src/ir/legacyir.rs
@@ -0,0 +1,347 @@
+// Legacy IR
+//
+// Copyright (C) 2014-2019 Ryan Specialty Group, LLC.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+//! Legacy IR faithful to the XSLT-based compiler.
+//!
+//! This represents the intermediate format (IR) used by the `xmlo` files
+//! (see [`crate::obj::xmlo`]) originally produced by the XSLT-based
+//! compiler.
+//! It consists largely of metadata for object symbols.
+//!
+//! This IR should be converted into a higher-level IR quickly,
+//! especially considering that it will be going away in the future.
+
+use crate::sym::Symbol;
+use std::convert::TryFrom;
+use std::result::Result;
+
+/// Toplevel package attributes.
+#[derive(Debug, Default, PartialEq, Eq)]
+pub struct PackageAttrs<'i> {
+ /// Unique package identifier.
+ ///
+ /// The package name is derived from the filename relative to the
+ /// project root during compilation (see `relroot`).
+ pub name: Option<&'i Symbol<'i>>,
+
+ /// Relative path from package to project root.
+ pub relroot: Option<String>,
+
+ /// Whether this package is a program.
+ ///
+ /// A _program_ is a package intended to be linked into a final
+ /// executable.
+ /// Programs cannot be imported by other packages.
+ /// Non-program packages cannot be linked.
+ pub program: bool,
+
+ /// Symbol representing package eligibility.
+ ///
+ /// A package is _eligible_ for computation if certain invariants are
+ /// met.
+ /// This symbol is responsible for including each of those invariants as
+ /// dependencies so that they are included at link-time.
+ pub elig: Option<&'i Symbol<'i>>,
+}
+
+/// Symbol attributes.
+///
+/// This is a subset of all available attributes available on the
+/// `preproc:sym` nodes;
+/// more will be added as needed.
+///
+/// Not all symbols share the same set of attributes,
+/// so this represents the union of all possible attribute sets.
+///
+/// Due to the number of possible attributes,
+/// this is not an opaque type.
+/// Consequently,
+/// valid values should be enforced by the Rust's type system.
+#[derive(Debug, Default, PartialEq, Eq)]
+pub struct SymAttrs<'i> {
+ /// Relative path to the package that defined this symbol.
+ ///
+ /// Object files store relative paths so that they are somewhat
+ /// portable—the
+ /// entire project root should be able to be relocated.
+ pub src: Option<&'i Symbol<'i>>,
+
+ /// Symbol type.
+ ///
+ /// The type describes the purpose of the symbol and determines both how
+ /// it is compiled and its location in the final executable.
+ pub ty: Option<SymType>,
+
+ /// Number of dimensions.
+ ///
+ /// This determines the number of subscripts needed to access a scalar
+ /// value.
+ /// A value of `0` indicates a scalar;
+ /// a value of `1` indicates a vector;
+ /// a value of `2` indicates a matrix;
+ /// and a value of `n` indicates a multi-dimensional array of
+ /// depth `n`.
+ pub dim: Option<u8>,
+
+ /// Type of underlying data.
+ ///
+ /// This is not a primitive,
+ /// and mostly represents whether or not floating point computations
+ /// will take place.
+ pub dtype: Option<SymDtype>,
+
+ /// Whether the symbol's location will be determined at link-time.
+ ///
+ /// Externs allow symbols to be referenced without having yet been given
+ /// a concrete definition,
+ /// provided that an eventual concrete definition matches the
+ /// provided declaration.
+ /// The linker (see [`crate::ld`]) is responsible for ensuring that the
+ /// extern is satisfied and properly located in the final executable.
+ pub extern_: bool,
+
+ /// Unique package identifier.
+ ///
+ /// The name of a package is automatically derived from the package path
+ /// relative to the project root.
+ /// _Note that this is problematic if one wants to compile the equivalent
+ /// of shared libraries._
+ pub pkg_name: Option<&'i Symbol<'i>>,
+
+ /// The identifier from which this one is derived.
+ ///
+ /// For example,
+ /// [`SymType::Cgen`] has a parent [`SymType::Class`] and
+ /// [`SymType::Gen`] has a parent [`SymType::Rate`].
+ pub parent: Option<&'i Symbol<'i>>,
+
+ /// Whether this identifier was generated by the compiler.
+ ///
+ /// A generated identifier is representative of an internal
+ /// implementation detail that should remain encapsulated from the
+ /// user and is subject to change over time.
+ ///
+ /// Identifiers created by templates are not considered to be generated.
+ pub generated: bool,
+
+ /// Child identifier associated with this identifier.
+ ///
+ /// For [`SymType::Class`],
+ /// this represents an associated [`SymType::Cgen`].
+ pub yields: Option<&'i Symbol<'i>>,
+
+ /// User-friendly identifier description.
+ ///
+ /// This is used primarily by [`SymType::Class`] and [`SymType::Gen`].
+ pub desc: Option<String>,
+
+ /// Related identifiers.
+ ///
+ /// These data represent a kluge created to add additional symbol
+ /// information in two different contexts:
+ ///
+ /// - [`SymType::Map`] includes the name of the source field; and
+ /// - [`SymType::Func`] lists params in order (so that the compiler
+ /// knows application order).
+ pub from: Option<Vec<&'i Symbol<'i>>>,
+
+ /// Whether symbol can be overridden.
+ ///
+ /// See also [`override`][SymAttrs::override_].
+ pub virtual_: bool,
+
+ /// Whether symbol is an override of a virtual symbol.
+ ///
+ /// See also [`virtual`][SymAttrs::virtual_].
+ pub override_: bool,
+}
+
+/// Legacy symbol types.
+///
+/// This enum represents all symbol types represented in the `xmlo` files.
+/// They are overly specialized and will be deprecated in favor of more
+/// generalized dependent types in later IRs.
+#[derive(Debug, PartialEq, Eq)]
+pub enum SymType {
+ /// Classification generator (from `lv:classify/@yields`).
+ Cgen,
+ /// Classification (from `lv:classify/@as`).
+ Class,
+ /// Constant (from `lv:const/@name`).
+ Const,
+ /// Function (from `lv:function/@name`).
+ Func,
+ /// Generator (from `lv:rate/@generates`).
+ Gen,
+ /// Local function parameter (from `lv:function/lv:param/@name`) or let
+ /// binding (from `lv:let/lv:values/lv:value/@name`).
+ Lparam,
+ /// Global parameter (from `lv:param/@name`).
+ Param,
+ /// Scalar calculation result (from `lv:rate/@yields`).
+ Rate,
+ /// Template (from `lv:template/@name`).
+ Tpl,
+ /// Typedef (from `lv:type/@name`).
+ Type,
+ /// Input map head (meta symbol generated by compiler for each input map).
+ MapHead,
+ /// Input field→param mapping (from `lvm:map`, `lvm:pass`).
+ Map,
+ /// Input map tail (meta symbol generated by compiler for each input map).
+ MapTail,
+ /// Return map head (meta symbol generated by compiler for each return map).
+ RetMapHead,
+ /// Return param→field mapping (from `lvm:map`, `lvm:pass`).
+ RetMap,
+ /// Return map tail (meta symbol generated by compiler for each return map).
+ RetMapTail,
+ /// Arbitrary metadata (from `lv:meta`).
+ Meta,
+ /// Rating worksheet (generated by compiler for worksheet packages).
+ Worksheet,
+}
+
+impl TryFrom<&[u8]> for SymType {
+ type Error = String;
+
+ /// Determine symbol type from source `preproc:sym/@type`.
+ ///
+ /// This raises source `xmlo` data into this IR.
+ /// See [`crate::obj::xmlo::reader`].
+ fn try_from(value: &[u8]) -> Result<SymType, Self::Error> {
+ match value {
+ b"cgen" => Ok(SymType::Cgen),
+ b"class" => Ok(SymType::Class),
+ b"const" => Ok(SymType::Const),
+ b"func" => Ok(SymType::Func),
+ b"gen" => Ok(SymType::Gen),
+ b"lparam" => Ok(SymType::Lparam),
+ b"param" => Ok(SymType::Param),
+ b"rate" => Ok(SymType::Rate),
+ b"tpl" => Ok(SymType::Tpl),
+ b"type" => Ok(SymType::Type),
+ b"retmap:head" => Ok(SymType::RetMapHead),
+ b"retmap" => Ok(SymType::RetMap),
+ b"retmap:tail" => Ok(SymType::RetMapTail),
+ b"map:head" => Ok(SymType::MapHead),
+ b"map" => Ok(SymType::Map),
+ b"map:tail" => Ok(SymType::MapTail),
+ b"meta" => Ok(SymType::Meta),
+ b"worksheet" => Ok(SymType::Worksheet),
+ _ => Err(format!(
+ "unknown symbol type `{}`",
+ String::from_utf8(value.to_vec())
+ .unwrap_or("(invalid UTF8)".into())
+ )),
+ }
+ }
+}
+
+/// Underlying datatype.
+///
+/// This is the type of scalar data stored within the given symbol.
+///
+/// *NB:* This was _not enforced_ by the XSLT-based compiler.
+#[derive(Debug, PartialEq, Eq, Clone, Copy)]
+pub enum SymDtype {
+ /// {⊥,⊤} = {0,1} ⊂ ℤ
+ Boolean,
+ /// ℤ
+ Integer,
+ /// ℝ
+ Float,
+ /// ∅
+ Empty,
+}
+
+impl AsRef<str> for SymDtype {
+ /// Produce `xmlo`-compatible representation.
+ fn as_ref(&self) -> &str {
+ match self {
+ SymDtype::Boolean => &"boolean",
+ SymDtype::Integer => &"integer",
+ SymDtype::Float => &"float",
+ SymDtype::Empty => &"empty",
+ }
+ }
+}
+
+impl TryFrom<&[u8]> for SymDtype {
+ type Error = String;
+
+ /// Determine data type from source `preproc:sym/@dtype`.
+ ///
+ /// This raises source `xmlo` data into this IR.
+ /// See [`crate::obj::xmlo::reader`].
+ fn try_from(value: &[u8]) -> Result<SymDtype, Self::Error> {
+ match value {
+ b"boolean" => Ok(SymDtype::Boolean),
+ b"integer" => Ok(SymDtype::Integer),
+ b"float" => Ok(SymDtype::Float),
+ b"empty" => Ok(SymDtype::Empty),
+ _ => Err(format!(
+ "unknown symbol dtype `{}`",
+ String::from_utf8(value.to_vec())
+ .unwrap_or("(invalid UTF8)".into())
+ )),
+ }
+ }
+}
+
+#[cfg(test)]
+mod test {
+ use super::*;
+
+ // We're not going to check every possible value here since we'd be
+ // maintaining the mapping in two places; we can leave that to
+ // integration tests.
+ #[test]
+ fn symtype_from_u8() {
+ assert_eq!(Ok(SymType::Cgen), SymType::try_from(b"cgen" as &[u8]));
+ }
+
+ #[test]
+ fn symtype_failure_from_unknown_u8() {
+ match SymType::try_from(b"unknown" as &[u8]) {
+ Err(s) => assert!(s.contains("unknown")),
+ bad => panic!("expected error: {:?}", bad),
+ }
+ }
+
+ #[test]
+ fn symdtype_from_u8() {
+ assert_eq!(
+ Ok(SymDtype::Integer),
+ SymDtype::try_from(b"integer" as &[u8])
+ );
+ }
+
+ #[test]
+ fn symdtype_failure_from_unknown_u8() {
+ match SymDtype::try_from(b"unknownd" as &[u8]) {
+ Err(s) => assert!(s.contains("unknownd")),
+ bad => panic!("expected error: {:?}", bad),
+ }
+ }
+
+ #[test]
+ fn symdtype_as_str() {
+ let boolean: &str = SymDtype::Boolean.as_ref();
+ assert_eq!("boolean", boolean);
+ }
+}
diff --git a/tamer/src/ir/mod.rs b/tamer/src/ir/mod.rs
new file mode 100644
index 0000000..1a74db1
--- /dev/null
+++ b/tamer/src/ir/mod.rs
@@ -0,0 +1,62 @@
+// Intermediate representations (IRs)
+//
+// Copyright (C) 2014-2019 Ryan Specialty Group, LLC.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+//! Intermediate representations for TAME programs.
+//!
+//! [Intermediate representations][ir] (IRs) are data structures used to
+//! represent source data in a manner most suitable for a particular phase
+//! of compilation.
+//! A single IR may be used by multiple compilation phases,
+//! or by multiple systems (e.g. various compilers or [linkers][]).
+//!
+//! [ir]: https://en.wikipedia.org/wiki/Intermediate_representation
+//! [linkers]: crate::ld
+//!
+//!
+//! Implicit AST
+//! ============
+//! Each input language begins as an [abstract syntax tree][ast] (AST),
+//! produced by the parser.
+//! For TAME languages that are XML-based,
+//! the production of the AST is handled by [`quick_xml`],
+//! and is effectively the same as the source XML.
+//! There is no explicit data structure to represent the AST of XML
+//! sources.
+//!
+//! [ast]: https://en.wikipedia.org/wiki/Abstract_syntax_tree
+//!
+//!
+//! Summary of IRs
+//! ==============
+//! There are currently two IRs:
+//!
+//! 1. **[Legacy IR](legacyir)** corresponds very closely to the structure
+//! of [`xmlo` object files](super::obj::xmlo).
+//! It contains a lot of cruft and will be replaced in the future with
+//! a more suitable IR.
+//! This stores very limited context for the information it provides,
+//! so it must quickly translate it to a higher-level IR for further
+//! processing before context is lost.
+//! 2. The **[Abstract Semantic Graph (ASG)](asg)** is created from
+//! lower-level IRs.
+//! It stores relationships between identifiers and expressions within
+//! a graph data structure,
+//! and is capable of representing entire programs composed of many
+//! different packages.
+
+pub mod asg;
+pub mod legacyir;
diff --git a/tamer/src/ld.rs b/tamer/src/ld.rs
index 3c6a875..66908b3 100644
--- a/tamer/src/ld.rs
+++ b/tamer/src/ld.rs
@@ -15,4 +15,96 @@
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
+//! Combine [object files](crate::obj) into a final executable.
+//!
+//! It's user-facing binary is [`tameld`](../../tameld).
+//!
+//!
+//! Background Information
+//! ======================
+//! A [linker][] is responsible for combining individually compiled
+//! [object files](crate::obj) containing relocatable code into a final
+//! executable.
+//! This involves putting the compiled code fragments into the right order
+//! and into the right place within the executable.
+//!
+//! [linker]: https://en.wikipedia.org/wiki/Linker_(computing)
+//!
+//! _See below for more information on why this linker currently produces
+//! another intermediate format (`xmle`) rather than a final executable._
+//!
+//! The type of relocatable code depends on the _target_.
+//! Currently, the only target is JavaScript.
+//!
+//!
+//! Backwards-Compatibility With XSLT System
+//! -------------------------------------
+//! This linker is part of the TAMER (TAME in Rust) project,
+//! which aims to incrementally rewrite TAME in Rust.
+//! Consequently, it must be able to serve as a drop-in replacement for the
+//! existing (XSLT) linker,
+//! which takes as input `xmlo` files and produces as output an `xmle`
+//! file.
+//! *This is not efficient*,
+//! and future versions will begin to migrate away from this strategy.
+//!
+//! The output `xmle` file can then be fed to a `standalone` command which
+//! extracts the JavaScript fragment and places it into its own file.
+//! Even when that is replaced
+//! (when this just outputs a final JS file directly),
+//! the `xmle` file is still needed for other purposes,
+//! such as `summary` and `dote` generation.
+//! Those too will eventually be linker targets.
+//!
+//!
+//! Linking Process
+//! ===============
+//! The linker works in the following steps:
+//!
+//! 1. [Object files](crate::obj) are recursively read.
+//! They are used in a streaming manner for the next step of the
+//! process;
+//! they do not persist in memory.
+//! Only the required portions of the file are loaded.
+//! See the [Legacy IR](crate::ir::legacyir) for more information.
+//!
+//! 2. This information is used to populate the [ASG].
+//! Information is added to the graph as it is discovered during object
+//! file loading,
+//! so the graph initially contains edges to missing identifiers.
+//! Expressions are _not_ added to the graph,
+//! as they are not needed for linking.
+//! Once all data are loaded,
+//! the ASG contains relocatable code fragments for each identifier.
+//!
+//! 3. The ASG is [sorted topologically][topo-sort] so that dependencies
+//! will be written to the executable file before identifiers that
+//! depend on them.
+//! Roots for the sort are specified by the return map.
+//! _Identifiers that are not accessable from one of those roots will be
+//! omitted from the executable output._
+//!
+//! 4. Relocatable code fragments are output into various sections in the
+//! executable file.
+//! This output file is currently `xmle`.
+//! (**TODO**: Link to new `xmle` crate.)
+//!
+//! [ASG]: crate::ir::asg
+//! [topo-sort]: https://en.wikipedia.org/wiki/Topological_sorting
+//!
+//! Steps 1 and 2 are performed at the same time:
+//! object files are used to immediately populate the [ASG][].
+//! Since the ASG contains only partial information,
+//! it must perform other validations (such as extern resolution) during
+//! this process;
+//! see [crate::ir::asg] for more information.
+//!
+//! Because the topological sort only considered explicitly defined roots,
+//! identifiers are only included in the final executable if they are
+//! either a root or are a dependency of a root.
+//! This makes it possible to create large reusable packages without
+//! incurring a runtime cost for unused objects,
+//! which is especially important since templates may expand into many
+//! identifiers.
+
pub mod poc;
diff --git a/tamer/src/ld/poc.rs b/tamer/src/ld/poc.rs
index d2b3fcd..53f9f7b 100644
--- a/tamer/src/ld/poc.rs
+++ b/tamer/src/ld/poc.rs
@@ -18,216 +18,44 @@
//! **This is a poorly-written proof of concept; do not use!** It has been
//! banished to its own file to try to make that more clear.
-use fixedbitset::FixedBitSet;
-use petgraph::graph::{DiGraph, EdgeIndex, Neighbors, NodeIndex};
-use petgraph::visit::{DfsPostOrder, GraphBase, IntoNeighbors, Visitable};
-use quick_xml::events::Event;
-use quick_xml::Reader;
-use std::collections::hash_map::{Entry, Iter};
-use std::collections::{HashMap, HashSet};
+use crate::global;
+use crate::ir::asg::IdentKind;
+use crate::ir::asg::{Asg, DefaultAsg, Object, ObjectRef, Source};
+use crate::obj::xmle::writer::{Sections, XmleWriter};
+use crate::obj::xmlo::reader::{XmloError, XmloEvent, XmloReader};
+use crate::sym::{DefaultInterner, Interner, Symbol};
+use fxhash::{FxHashMap, FxHashSet};
+use petgraph::visit::DfsPostOrder;
+use std::convert::TryInto;
use std::error::Error;
use std::fs;
-use std::io::BufRead;
-use std::ops::{Deref, Index};
-use std::rc::Rc;
-
-// The term "sym" is used throughout because it's easier to search for that
-// in source code than "symbol", which is a generic term with many different
-// meanings.
-
-// if mutability is needed:
-//#[derive(Debug)]
-//struct SymRecord {
-// data: SymData,
-//
-// // the idea is to keep the index encapsulated so that nothing else can
-// // ever hold a reference to it, ensuring that it's freed when the node
-// // is removed
-// index: Rc<RefCell<Option<NodeIndex>>>,
-//}
-
-#[derive(Debug)]
-struct SymData {
- name: Rc<str>,
-}
-
-type DepGraphNode = SymEntry;
-type DepGraphEdge = ();
-
-struct DepGraph {
- graph: DiGraph<DepGraphNode, DepGraphEdge>,
-
- // serves as both a string internment system and graph indexer
- index: HashMap<Rc<str>, SymRef>,
- // if removals are permitted:
- //index: HashMap<Rc<str>, Weak<RefCell<Option<NodeIndex>>>>,
-}
-
-// This encapsulates the underlying Graph to enforce certain
-// assumptions. For example, we do not permit removing nodes because that
-// would invalidate the NodeIndex reference in the index, which would then
-// require workarounds like the commented-out code above and below.
-//
-// While Petgraph's use of indexes to represent graph and edge references
-// makes it easy to bypass the borrow checker, it does just that---it's no
-// different than a pointer reference (albeit guaranteed to safely reference
-// a node rather than an arbitrary memory location) that can change out from
-// under you at any moment. As such, much of the planning that went into
-// this was determining how to best mitigate that.
-//
-// The linker has certain needs that may differ as the compiler evolves, so
-// it may be desirable to permit deletions in the future. In the meantime,
-// if a node needs to be deleted, we can simply remove all edges from it and
-// possibly mark it in a way that states it was removed.
-//
-// This graph uses a separate map to serve a dual role: a string internment
-// system and an indexer by symbol name. This will have to evolve in the
-// future as the graph ends up containing more stuff.
-//
-// This is currently called a dependency graph, since that's what we're
-// using it for, but in the future the compiler will also use it as an IR,
-// so this will likely be renamed.
-impl DepGraph {
- fn new() -> Self {
- Self {
- // TODO: with_capacity
- graph: DiGraph::new(),
- index: HashMap::new(),
- }
- }
-
- fn declare(&mut self, name: &str) -> SymRef {
- match self.index.entry(name.into()) {
- Entry::Occupied(o) => *o.get(),
- Entry::Vacant(v) => {
- let entry = SymEntry::MissingSym {
- name: Rc::clone(v.key()),
- };
-
- let index = SymRef(self.graph.add_node(entry));
- v.insert(index);
+use std::io::BufReader;
+use std::io::Cursor;
- index
- }
- }
- }
-
- // will not duplicate dependencies if they already exist
- fn declare_dep(&mut self, symbol: SymRef, dep: SymRef) -> () {
- self.graph.update_edge(*symbol, *dep, ());
- }
-
- fn lookup(&self, name: &str) -> Option<SymRef> {
- self.index.get(name.into()).map(|index| *index)
- }
-
- fn index_iter(&self) -> Iter<Rc<str>, SymRef> {
- self.index.iter()
- }
-
- // POC when removals were permitted:
- //fn add_symbol(&mut self, sym: SymData) -> NodeIndex {
- // let name = Rc::clone(&sym.name);
- // let record = SymRecord { data: sym, index: Rc::new(RefCell::new(None)) };
- // let index = self.graph.add_node(record);
-
- // let index = Rc::downgrade(&self.graph[index].index);
- // self.graph[index].index.replace(Some(index));
-
- // self.index.insert(name, index);
- // index
- //}
-}
-
-impl GraphBase for DepGraph {
- type NodeId = NodeIndex;
- type EdgeId = EdgeIndex;
-}
-
-impl Visitable for DepGraph {
- type Map = FixedBitSet;
-
- fn visit_map(&self) -> Self::Map {
- self.graph.visit_map()
- }
-
- fn reset_map(&self, map: &mut Self::Map) {
- self.graph.reset_map(map)
- }
-}
-
-impl<'a> IntoNeighbors for &'a DepGraph {
- type Neighbors = Neighbors<'a, DepGraphEdge>;
-
- fn neighbors(self, n: Self::NodeId) -> Self::Neighbors {
- self.graph.neighbors(n)
- }
-}
-
-impl Index<SymRef> for DepGraph {
- type Output = DepGraphNode;
-
- fn index(&self, index: SymRef) -> &Self::Output {
- &self.graph[*index]
- }
-}
-
-// TODO: we may not to allow this; using SymRef could be a means to
-// guarantee that a lookup has occurred and that it actually exists. We
-// don't need this if we set NodeId = SymRef in GraphBase, but that requires
-// implementing other traits as well.
-impl Index<NodeIndex> for DepGraph {
- type Output = DepGraphNode;
-
- fn index(&self, index: NodeIndex) -> &Self::Output {
- &self.graph[index]
- }
-}
-
-#[derive(Debug, Clone, Copy, PartialEq)]
-struct SymRef(NodeIndex);
-
-impl From<SymRef> for NodeIndex {
- fn from(symref: SymRef) -> Self {
- *symref
- }
-}
-
-impl From<NodeIndex> for SymRef {
- fn from(index: NodeIndex) -> Self {
- Self(index)
- }
-}
-
-impl Deref for SymRef {
- type Target = NodeIndex;
-
- fn deref(&self) -> &Self::Target {
- &self.0
- }
-}
-
-#[derive(Debug, PartialEq)]
-enum SymEntry {
- MissingSym { name: Rc<str> },
-}
+type LinkerAsg<'i> = DefaultAsg<'i, global::ProgIdentSize>;
+type LinkerObjectRef = ObjectRef<global::ProgIdentSize>;
pub fn main() -> Result<(), Box<dyn Error>> {
- let mut pkgs_seen = HashSet::<String>::new();
- let mut fragments = HashMap::<String, String>::new();
- let mut depgraph = DepGraph::new();
+ let mut pkgs_seen: FxHashSet<String> = Default::default();
+ let mut fragments: FxHashMap<&str, String> = Default::default();
+ let mut depgraph = LinkerAsg::with_capacity(65536, 65536);
+ let mut roots = Vec::new();
+ let interner = DefaultInterner::new();
let package_path = std::env::args().nth(1).expect("Missing argument");
let abs_path = fs::canonicalize(package_path).unwrap();
println!("WARNING: This is proof-of-concept; do not use!");
- load_xmlo(
+ let (name, relroot) = load_xmlo(
&abs_path.to_str().unwrap().to_string(),
&mut pkgs_seen,
&mut fragments,
&mut depgraph,
- )?;
+ &interner,
+ &mut roots,
+ )?
+ .expect("missing root package information");
// println!(
// "Graph {:?}",
@@ -239,92 +67,152 @@ pub fn main() -> Result<(), Box<dyn Error>> {
// .collect::<Vec<_>>()
// );
- let sorted = sort_deps(&depgraph);
+ roots.extend(
+ vec!["___yield", "___worksheet"]
+ .iter()
+ .map(|name| interner.intern(name))
+ .filter_map(|sym| depgraph.lookup(sym)),
+ );
+
+ let mut sorted = sort_deps(&depgraph, &roots);
+
+ //println!("Sorted ({}): {:?}", sorted.len(), sorted);
- println!("Sorted ({}): {:?}", sorted.len(), sorted);
+ output_xmle(
+ &depgraph,
+ &interner,
+ &mut sorted,
+ name.expect("missing root package name"),
+ relroot.expect("missing root package relroot"),
+ )?;
Ok(())
}
-fn load_xmlo<'a>(
+fn load_xmlo<'a, 'i, I: Interner<'i>>(
path_str: &'a str,
- pkgs_seen: &mut HashSet<String>,
- fragments: &mut HashMap<String, String>,
- depgraph: &mut DepGraph,
-) -> Result<(), Box<dyn Error>> {
+ pkgs_seen: &mut FxHashSet<String>,
+ fragments: &mut FxHashMap<&'i str, String>,
+ depgraph: &mut LinkerAsg<'i>,
+ interner: &'i I,
+ roots: &mut Vec<LinkerObjectRef>,
+) -> Result<Option<(Option<&'i Symbol<'i>>, Option<String>)>, Box<dyn Error>> {
let path = fs::canonicalize(path_str)?;
let path_str = path.to_str().unwrap();
+ let first = pkgs_seen.len() == 0;
+
if !pkgs_seen.insert(path_str.to_string()) {
- return Ok(());
+ return Ok(None);
}
- println!("processing {}", path_str);
-
- let mut found = HashSet::<String>::new();
-
- match Reader::from_file(&path) {
- Ok(mut reader) => loop {
- let mut buf = Vec::new();
-
- // we know that the XML produced by Saxon is valid
- reader.check_end_names(false);
-
- match reader.read_event(&mut buf) {
- Ok(Event::Start(ele)) | Ok(Event::Empty(ele)) => {
- let mut attrs = ele.attributes();
- let mut filtered =
- attrs.with_checks(false).filter_map(Result::ok);
-
- match ele.name() {
- b"preproc:sym-dep" => filtered
- .find(|attr| attr.key == b"name")
- .map(|attr| attr.value)
- .and_then(|mut name| {
- read_deps(&mut reader, depgraph, name.to_mut())
- })
- .ok_or("Missing name"),
-
- b"preproc:sym" => {
- filtered
- .find(|attr| attr.key == b"src")
- .map(|attr| attr.value.to_owned())
- .and_then(|src| {
- let path_str =
- std::str::from_utf8(&src).unwrap();
-
- found.insert(path_str.to_string());
- Some(())
- });
- Ok(())
- }
+ //println!("processing {}", path_str);
+
+ let mut found: FxHashSet<&str> = Default::default();
+
+ let file = fs::File::open(&path)?;
+ let reader = BufReader::new(file);
+ let mut xmlo = XmloReader::new(reader, interner);
+ let mut elig = None;
+
+ let mut name: Option<&'i Symbol<'i>> = None;
+ let mut relroot: Option<String> = None;
+
+ loop {
+ match xmlo.read_event() {
+ Ok(XmloEvent::Package(attrs)) => {
+ if first {
+ name = attrs.name;
+ relroot = attrs.relroot;
+ }
+ elig = attrs.elig;
+ }
- b"preproc:fragment" => filtered
- .find(|attr| attr.key == b"id")
- .map(|attr| String::from_utf8(attr.value.to_vec()))
- .and_then(|id| {
- let fragment = reader
- .read_text(ele.name(), &mut Vec::new())
- .unwrap_or("".to_string());
-
- fragments.insert(id.unwrap(), fragment);
- Some(())
- })
- .ok_or("Missing fragment id"),
- _ => Ok(()),
+ Ok(XmloEvent::SymDeps(sym, deps)) => {
+ // TODO: API needs to expose whether a symbol is already
+ // known so that we can warn on them
+
+ // Maps should not pull in symbols since we may end up
+ // mapping to params that are never actually used
+ if !sym.starts_with(":map:") {
+ for dep_sym in deps {
+ depgraph.add_dep_lookup(sym, dep_sym);
}
}
- Ok(Event::Eof) => break (),
- Err(e) => {
- panic!("Error at {}: {:?}", reader.buffer_position(), e);
+ }
+
+ Ok(XmloEvent::SymDecl(sym, attrs)) => {
+ if let Some(sym_src) = attrs.src {
+ found.insert(sym_src);
+ } else if attrs.extern_ {
+ // TODO: externs (they're implicitly handled, without
+ // checks, by Missing)
+ // depgraph.declare_extern(sym, kind);
+ } else {
+ let owned = attrs.src.is_none();
+
+ let kind = (&attrs).try_into().map_err(|err| {
+ format!("sym `{}` attrs error: {}", sym, err)
+ });
+
+ let mut src: Source = attrs.into();
+
+ // Existing convention is to omit @src of local package
+ // (in this case, the program being linked)
+ if first {
+ src.pkg_name = None;
+ }
+
+ // TODO: should probably track these down in the XSLT linker...
+ match kind {
+ Ok(kindval) => {
+ // TODO: inefficient
+ let link_root = owned
+ && (kindval == IdentKind::Meta
+ || kindval == IdentKind::Map
+ || kindval == IdentKind::RetMap);
+
+ let node = depgraph.declare(sym, kindval, src)?;
+
+ if link_root {
+ roots.push(node);
+ }
+ }
+ Err(e) => println!("{:?}; skipping...", e),
+ };
}
- _ => Ok(()),
}
- .unwrap_or_else(|r| panic!("Parse error: {:?}", r));
- buf.clear();
- },
- Err(e) => panic!("Error {:?}", e),
+ Ok(XmloEvent::Fragment(sym, text)) => {
+ let result = depgraph.set_fragment(
+ depgraph.lookup(sym).unwrap_or_else(|| {
+ panic!("missing symbol for fragment: {}", sym)
+ }),
+ text,
+ );
+
+ match result {
+ Ok(_) => (),
+ Err(e) => println!("{:?}; skipping...", e),
+ };
+ }
+
+ // We don't need to read any further than the end of the
+ // header (symtable, sym-deps, fragments)
+ Ok(XmloEvent::Eoh) => break,
+
+ Err(err @ XmloError::UnassociatedFragment) => {
+ println!("{:?}; skipping...", err);
+ }
+
+ err @ Err(_) => err.map(|_| ())?,
+ }
+ }
+
+ if let Some(elig_sym) = elig {
+ roots.push(depgraph.lookup(elig_sym).expect(
+ "internal error: package elig references nonexistant symbol",
+ ));
}
let mut dir = path.clone();
@@ -335,118 +223,131 @@ fn load_xmlo<'a>(
path_buf.push(relpath);
path_buf.set_extension("xmlo");
- //println!("Trying {:?}", path_buf);
+ // println!("Trying {:?}", path_buf);
let path_abs = path_buf.canonicalize().unwrap();
let path = path_abs.to_str().unwrap();
- load_xmlo(path, pkgs_seen, fragments, depgraph)?;
+ load_xmlo(path, pkgs_seen, fragments, depgraph, interner, roots)?;
}
- Ok(())
-}
-
-fn read_deps<B>(
- reader: &mut Reader<B>,
- depgraph: &mut DepGraph,
- name: &[u8],
-) -> Option<()>
-where
- B: BufRead,
-{
- // TODO: API needs to expose whether a symbol is already known so that
- // we can warn on them
- // note: using from_utf8_unchecked here did _not_ improve performance
- let sym_node = depgraph.declare(std::str::from_utf8(name).unwrap());
-
- //println!("processing deps for {}", sym_name);
-
- loop {
- match reader.read_event(&mut Vec::new()) {
- Ok(Event::Start(ele)) | Ok(Event::Empty(ele)) => {
- let mut attrs = ele.attributes();
- let mut filtered =
- attrs.with_checks(false).filter_map(Result::ok);
-
- filtered.find(|attr| attr.key == b"name").and_then(
- |mut attr| {
- let name = attr.value.to_mut();
- let str = std::str::from_utf8(name).unwrap();
-
- let dep_node = depgraph.declare(&str);
- depgraph.declare_dep(sym_node, dep_node);
-
- Some(())
- },
- );
-
- //println!("{:?}", ele.attributes().collect::<Vec<_>>());
- }
-
- Ok(Event::Eof) | Ok(Event::End(_)) => break Some(()),
-
- Err(e) => {
- panic!("Error at {}: {:?}", reader.buffer_position(), e);
- }
-
- _ => (),
- }
+ if first {
+ Ok(Some((name, relroot)))
+ } else {
+ Ok(None)
}
}
-fn sort_deps(depgraph: &DepGraph) -> Vec<&SymEntry> {
+fn sort_deps<'a, 'i>(
+ depgraph: &'a LinkerAsg<'i>,
+ roots: &Vec<LinkerObjectRef>,
+) -> Sections<'a, 'i> {
// @type=meta, @preproc:elig-class-yields
// @type={ret}map{,:head,:tail}
- let roots = discover_roots(depgraph);
+ let mut deps: Sections = Sections::new();
// This is technically a topological sort, but functions have
// cycles. Once we have more symbol metadata, we can filter them out
// and actually invoke toposort.
let mut dfs = DfsPostOrder::empty(&depgraph);
- let mut sorted = Vec::new();
+
+ //println!("discovered roots: {:?}", roots);
// TODO: we'll be processing various roots separately
for index in roots {
- dfs.stack.push(*index);
+ dfs.stack.push((*index).into());
}
+ // TODO: can we encapsulate NodeIndex?
while let Some(index) = dfs.next(&depgraph) {
- sorted.push(&depgraph[index]);
+ let ident = depgraph.get(index).unwrap();
+
+ match ident {
+ Object::Ident(_, kind, _)
+ | Object::IdentFragment(_, kind, _, _) => match kind {
+ IdentKind::Meta => deps.meta.push_body(ident),
+ IdentKind::Worksheet => deps.worksheet.push_body(ident),
+ IdentKind::Param(_, _) => deps.params.push_body(ident),
+ IdentKind::Type(_) => deps.types.push_body(ident),
+ IdentKind::Func(_, _) => deps.funcs.push_body(ident),
+ IdentKind::MapHead | IdentKind::Map | IdentKind::MapTail => {
+ deps.map.push_body(ident)
+ }
+ IdentKind::RetMapHead
+ | IdentKind::RetMap
+ | IdentKind::RetMapTail => deps.retmap.push_body(ident),
+ _ => deps.rater.push_body(ident),
+ },
+ _ => panic!("unexpected node: {:?}", ident),
+ }
}
- sorted
+ deps
+}
+
+fn get_interner_value<'a, 'i, I: Interner<'i>>(
+ depgraph: &'a LinkerAsg<'i>,
+ interner: &'i I,
+ name: &str,
+) -> &'a Object<'i> {
+ depgraph
+ .get(
+ depgraph
+ .lookup(interner.intern(name))
+ .unwrap_or_else(|| panic!("missing identifier: {}", name)),
+ )
+ .expect("Could not get interner value")
}
-fn discover_roots(depgraph: &DepGraph) -> Vec<SymRef> {
- // TODO: filter_map
- let mut map_syms = depgraph
- .index_iter()
- .filter(|(key, _)| {
- key.starts_with(":map:") || key.starts_with(":retmap:")
- })
- .map(|(_, value)| *value)
- .collect::<Vec<_>>();
-
- let mut roots = vec!["___yield", "___worksheet"]
- .iter()
- .filter_map(|sym| depgraph.lookup(sym))
- .collect::<Vec<_>>();
-
- roots.append(&mut map_syms);
-
- //println!(
- // "found roots: {:?}",
- // roots
- // .iter()
- // .map(|index| &depgraph.graph[*index])
- // .collect::<Vec<_>>()
- //);
-
- roots
+fn output_xmle<'a, 'i, I: Interner<'i>>(
+ depgraph: &'a LinkerAsg<'i>,
+ interner: &'i I,
+ sorted: &mut Sections<'a, 'i>,
+ name: &'i Symbol<'i>,
+ relroot: String,
+) -> Result<(), Box<dyn Error>> {
+ if !sorted.map.is_empty() {
+ sorted.map.push_head(get_interner_value(
+ depgraph,
+ interner,
+ &String::from(":map:___head"),
+ ));
+ sorted.map.push_tail(get_interner_value(
+ depgraph,
+ interner,
+ &String::from(":map:___tail"),
+ ));
+ }
+
+ if !sorted.retmap.is_empty() {
+ sorted.retmap.push_head(get_interner_value(
+ depgraph,
+ interner,
+ &String::from(":retmap:___head"),
+ ));
+ sorted.retmap.push_tail(get_interner_value(
+ depgraph,
+ interner,
+ &String::from(":retmap:___tail"),
+ ));
+ }
+
+ let writer = Cursor::new(Vec::new());
+ let mut xmle_writer = XmleWriter::new(writer);
+ xmle_writer
+ .write(&sorted, name, &relroot)
+ .expect("Could not write xmle output");
+
+ print!(
+ "{}",
+ String::from_utf8(xmle_writer.into_inner().into_inner())?
+ );
+
+ Ok(())
}
#[cfg(test)]
-mod tests {
+mod test {
#[test]
fn placeholder() {}
}
diff --git a/tamer/src/lib.rs b/tamer/src/lib.rs
index bf0f698..f69be3a 100644
--- a/tamer/src/lib.rs
+++ b/tamer/src/lib.rs
@@ -15,4 +15,13 @@
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
+//! An incremental rewrite of TAME in Rust.
+
+pub mod global;
+pub mod ir;
pub mod ld;
+pub mod obj;
+pub mod sym;
+
+#[cfg(test)]
+pub mod test;
diff --git a/tamer/src/obj/mod.rs b/tamer/src/obj/mod.rs
new file mode 100644
index 0000000..eb45061
--- /dev/null
+++ b/tamer/src/obj/mod.rs
@@ -0,0 +1,35 @@
+// Object files
+//
+// Copyright (C) 2014-2019 Ryan Specialty Group, LLC.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+//! Object file construction and processing.
+//!
+//! An _[object file][]_ contains relocatable compiled code, symbol tables,
+//! and other information produced by the compiler.
+//! It is the responsibility of the [linker](super::ld) to construct a final
+//! executable from these files.
+//!
+//! [object file]: https://en.wikipedia.org/wiki/Object_file
+//!
+//! The only object file currently supported by TAMER is the [`xmlo`]
+//! format,
+//! produced by the XSLT compiler.
+//! It will likely be replaced with [ELF] object files in the future.
+//!
+//! [ELF]: https://en.wikipedia.org/wiki/Executable_and_Linkable_Format
+
+pub mod xmle;
+pub mod xmlo;
diff --git a/tamer/src/obj/xmle/mod.rs b/tamer/src/obj/xmle/mod.rs
new file mode 100644
index 0000000..1b1c360
--- /dev/null
+++ b/tamer/src/obj/xmle/mod.rs
@@ -0,0 +1,69 @@
+// xmle object files
+//
+// Copyright (C) 2014-2019 Ryan Specialty Group, LLC.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+//! `xmle` file construction and processing.
+//!
+//! This file format exists for compatibility with the old compiler
+//! written in XSLT; it will be removed in the future.
+//!
+//!
+//! `xmle` Files
+//! ===================
+//! An `xmle` file is produced by the for each source file.
+//! The format is XML because the original compiler was written in XSLT.
+//!
+//! The general structure of an `xmle` file consists of different sections:
+//! - map
+//! - return map
+//! - statics
+//! - rater
+//!
+//! For example (with some extra information omitted):
+//!
+//! ```xml
+//! <package xmlns="http://www.lovullo.com/rater"
+//! xmlns:preproc="http://www.lovullo.com/rater/preproc"
+//! xmlns:l="http://www.lovullo.com/rater/linker"
+//! title="suppliers/tax"
+//! program="true"
+//! name="suppliers/tax"
+//! __rootpath="../">
+//! <l:dep>
+//! <preproc:sym type="func"
+//! dim="0"
+//! dtype="float"
+//! name="min"
+//! src="../rater/core/numeric/minmax"
+//! desc="Return the lesser value"/>
+//! </l:dep>
+//! <l:map-from>
+//! <l:from name="latest_operation_hour"/>
+//! </l:map-from>
+//! <l:map-exec>
+//! function( input, callback ) {)
+//! </l:map-exec>
+//! <l:retmap-exec>
+//! function( input, callback ) {)
+//! </l:retmap-exec>
+//! <l:static>
+//! function func_min( args , min1, min2) {return min1;}
+//! </l:static>
+//! <l:exec>consts[&apos;CMP_OP_EQ&apos;] = 1;</l:exec>
+//! </package>
+//! ```
+
+pub mod writer;
diff --git a/tamer/src/obj/xmle/writer/mod.rs b/tamer/src/obj/xmle/writer/mod.rs
new file mode 100644
index 0000000..4287699
--- /dev/null
+++ b/tamer/src/obj/xmle/writer/mod.rs
@@ -0,0 +1,47 @@
+// Object file writer
+//
+// Copyright (C) 2014-2019 Ryan Specialty Group, LLC.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+//! xmle file writer.
+//!
+//! This defines a lower-level event-based `XmleWriter` similar to that of
+//! `quick_xml`, where the events are a slightly higher-level abstraction
+//! over the types of nodes present in the file.
+//!
+//! For more information on xmle files, see the [parent crate][`super`].
+//!
+//! The example below is incomplete, but shows the general usage.
+//!
+//! ```
+//! use tamer::obj::xmle::writer::{Sections, XmleWriter};
+//! use tamer::sym::{DefaultInterner, Interner, Symbol};
+//! use std::io::Cursor;
+//!
+//! let interner = DefaultInterner::new();
+//! let name = interner.intern(&String::from("foo"));
+//!
+//! let sections = Sections::new();
+//! let writer = Cursor::new(Vec::new());
+//! let mut xmle_writer = XmleWriter::new(writer);
+//! xmle_writer.write(&sections, name, &String::from(""));
+//! ```
+
+mod writer;
+mod xmle;
+
+pub use writer::{Result, Section, Sections, Writer, WriterError};
+
+pub use xmle::XmleWriter;
diff --git a/tamer/src/obj/xmle/writer/writer.rs b/tamer/src/obj/xmle/writer/writer.rs
new file mode 100644
index 0000000..05d89f9
--- /dev/null
+++ b/tamer/src/obj/xmle/writer/writer.rs
@@ -0,0 +1,320 @@
+// xmle object file writer
+//
+// Copyright (C) 2014-2019 Ryan Specialty Group, LLC.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+use crate::ir::asg::Object;
+use crate::sym::Symbol;
+use quick_xml::Error as XmlError;
+use std::io::{Error as IoError, Write};
+use std::result;
+use std::str::Utf8Error;
+
+type ObjectRef<'a, 'i> = &'a Object<'i>;
+pub type Result<T = ()> = result::Result<T, WriterError>;
+pub type ObjectVec<'a, 'i> = Vec<ObjectRef<'a, 'i>>;
+
+/// A wrapper around a `Write` object
+///
+/// This is used to take the [`Sections`] and write out the xmle files.
+pub trait Writer<W: Write> {
+ fn write(
+ &mut self,
+ sections: &Sections,
+ name: Symbol,
+ relroot: &str,
+ ) -> Result<()>
+ where
+ Self: Sized;
+}
+
+/// A Section that needs to be written to the buffer
+///
+/// Most sections will only need a `body`, but some innlude `head` and `tail`
+/// information. Rather than dealing with those differently, each `Section`
+/// will have a `head` and `tail` that are empty by default.
+#[derive(Clone, Default)]
+pub struct Section<'a, 'i> {
+ head: ObjectVec<'a, 'i>,
+ body: ObjectVec<'a, 'i>,
+ tail: ObjectVec<'a, 'i>,
+}
+
+impl<'a, 'i> Section<'a, 'i> {
+ /// Constructor for Sections
+ ///
+ /// ```
+ /// use tamer::obj::xmle::writer::Section;
+ ///
+ /// let section = Section::new();
+ /// ```
+ pub fn new() -> Self {
+ Self {
+ head: Vec::new(),
+ body: Vec::new(),
+ tail: Vec::new(),
+ }
+ }
+
+ /// The length of the `Section`
+ pub fn len(&self) -> usize {
+ self.head.len() + self.body.len() + self.tail.len()
+ }
+
+ /// Check if the `Section` is empty
+ pub fn is_empty(&self) -> bool {
+ self.len() == 0
+ }
+
+ /// Push an `Object` into a `Section`'s head
+ pub fn push_head(&mut self, obj: ObjectRef<'a, 'i>) {
+ self.head.push(&obj)
+ }
+
+ /// Push an `Object` into a `Section`'s body
+ pub fn push_body(&mut self, obj: ObjectRef<'a, 'i>) {
+ self.body.push(&obj)
+ }
+
+ /// Push an `Object` into a `Section`'s tail
+ pub fn push_tail(&mut self, obj: ObjectRef<'a, 'i>) {
+ self.tail.push(&obj)
+ }
+
+ /// Merge the parts of a `Section` into one [`SectionIterator`]
+ ///
+ /// The `Section` internals need to be iterated as a group so we needed to
+ /// create a custom iterator, [`SectionIterator`] to do this for us. This
+ /// method allows us to access the iterator.
+ ///
+ /// ```
+ /// use tamer::obj::xmle::writer::Section;
+ /// use tamer::ir::asg::Object;
+ ///
+ /// let mut section = Section::new();
+ /// let obj = Object::Empty;
+ /// let expect = vec![&obj, &obj, &obj];
+ ///
+ /// section.push_head(&obj);
+ /// section.push_body(&obj);
+ /// section.push_tail(&obj);
+ /// let section_iter = section.iter();
+ ///
+ /// for object in section_iter {
+ /// assert_eq!(&obj, object);
+ /// }
+ /// ```
+ pub fn iter(&self) -> SectionIterator {
+ SectionIterator {
+ inner: Box::new(
+ self.head
+ .iter()
+ .chain(self.body.iter())
+ .chain(self.tail.iter())
+ .copied(),
+ ),
+ }
+ }
+}
+
+/// Wrapper for an Iterator
+///
+/// This allows us to iterate over all parts of a [`Section`].
+pub struct SectionIterator<'a, 'i> {
+ inner: Box<dyn Iterator<Item = &'a Object<'i>> + 'a>,
+}
+
+impl<'a, 'i> Iterator for SectionIterator<'a, 'i> {
+ type Item = &'a Object<'i>;
+
+ fn next(&mut self) -> Option<Self::Item> {
+ self.inner.next()
+ }
+}
+
+/// Sections that need to be written to a buffer
+///
+/// All the properties are public [`Section`] objects and will be accessed
+/// directly by the [`Writer`].
+#[derive(Default)]
+pub struct Sections<'a, 'i> {
+ pub map: Section<'a, 'i>,
+ pub retmap: Section<'a, 'i>,
+ pub meta: Section<'a, 'i>,
+ pub worksheet: Section<'a, 'i>,
+ pub params: Section<'a, 'i>,
+ pub types: Section<'a, 'i>,
+ pub funcs: Section<'a, 'i>,
+ pub rater: Section<'a, 'i>,
+}
+
+impl<'a, 'i> Sections<'a, 'i> {
+ /// Constructor for Sections
+ ///
+ /// ```
+ /// use tamer::obj::xmle::writer::Sections;
+ ///
+ /// let sections = Sections::new();
+ /// ```
+ pub fn new() -> Self {
+ Self {
+ map: Section::new(),
+ retmap: Section::new(),
+ meta: Section::new(),
+ worksheet: Section::new(),
+ params: Section::new(),
+ types: Section::new(),
+ funcs: Section::new(),
+ rater: Section::new(),
+ }
+ }
+}
+
+/// Error implementations for the writer
+#[derive(Debug)]
+pub enum WriterError {
+ Io(IoError),
+ Utf8(Utf8Error),
+ XmlError(XmlError),
+ ExpectedFragment(String),
+}
+
+impl From<IoError> for WriterError {
+ fn from(err: IoError) -> Self {
+ WriterError::Io(err)
+ }
+}
+
+impl From<Utf8Error> for WriterError {
+ fn from(err: Utf8Error) -> Self {
+ WriterError::Utf8(err)
+ }
+}
+
+impl From<XmlError> for WriterError {
+ fn from(err: XmlError) -> Self {
+ WriterError::XmlError(err)
+ }
+}
+
+#[cfg(test)]
+mod test {
+ use super::*;
+
+ #[test]
+ fn section_empty() {
+ let section = Section::new();
+
+ assert!(section.head.is_empty());
+ assert!(section.body.is_empty());
+ assert!(section.tail.is_empty());
+ }
+
+ #[test]
+ fn section_head() {
+ let mut section = Section::new();
+ let obj = Object::Empty;
+
+ assert!(section.head.is_empty());
+
+ section.push_head(&obj);
+
+ assert_eq!(Some(&&obj), section.head.get(0));
+ }
+
+ #[test]
+ fn section_body() {
+ let mut section = Section::new();
+ let obj = Object::Empty;
+
+ assert!(section.body.is_empty());
+
+ section.push_body(&obj);
+
+ let body = section.body;
+ assert_eq!(Some(&&obj), body.get(0));
+ }
+
+ #[test]
+ fn section_tail() {
+ let mut section = Section::new();
+ let obj = Object::Empty;
+
+ assert!(section.tail.is_empty());
+
+ section.push_tail(&obj);
+
+ assert_eq!(Some(&&obj), section.tail.get(0));
+ }
+
+ #[test]
+ fn section_len() {
+ let mut section = Section::new();
+ let obj = Object::Empty;
+
+ assert_eq!(0, section.len());
+ section.push_head(&obj);
+ assert_eq!(1, section.len());
+ section.push_body(&obj);
+ assert_eq!(2, section.len());
+ section.push_tail(&obj);
+ assert_eq!(3, section.len());
+ }
+
+ #[test]
+ fn section_is_empty_head() {
+ let mut section = Section::new();
+ let obj = Object::Empty;
+
+ assert!(section.is_empty());
+ section.push_head(&obj);
+ assert!(!section.is_empty());
+ }
+
+ #[test]
+ fn section_is_empty_body() {
+ let mut section = Section::new();
+ let obj = Object::Empty;
+
+ assert!(section.is_empty());
+ section.push_body(&obj);
+ assert!(!section.is_empty());
+ }
+
+ #[test]
+ fn section_is_empty_tail() {
+ let mut section = Section::new();
+ let obj = Object::Empty;
+
+ assert!(section.is_empty());
+ section.push_tail(&obj);
+ assert!(!section.is_empty());
+ }
+
+ #[test]
+ fn section_iterator() {
+ let mut section = Section::new();
+ let obj = Object::Empty;
+ let expect = vec![&obj, &obj, &obj];
+
+ section.push_head(&obj);
+ section.push_body(&obj);
+ section.push_tail(&obj);
+
+ let collection: Vec<_> = section.iter().collect();
+
+ assert_eq!(expect, collection);
+ }
+}
diff --git a/tamer/src/obj/xmle/writer/xmle.rs b/tamer/src/obj/xmle/writer/xmle.rs
new file mode 100644
index 0000000..fc659a6
--- /dev/null
+++ b/tamer/src/obj/xmle/writer/xmle.rs
@@ -0,0 +1,749 @@
+// Concrete xmle writer
+//
+// Copyright (C) 2014-2019 Ryan Specialty Group, LLC.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+use super::writer::{Result, SectionIterator, Sections, WriterError};
+use crate::ir::asg::{IdentKind, Object};
+use crate::sym::Symbol;
+use fxhash::FxHashSet;
+#[cfg(test)]
+use mock::MockXmlWriter as XmlWriter;
+use quick_xml::events::{BytesEnd, BytesStart, BytesText, Event};
+#[cfg(not(test))]
+use quick_xml::Writer as XmlWriter;
+use std::io::Write;
+
+/// Responsible for writing to the xmle files
+pub struct XmleWriter<W: Write> {
+ writer: XmlWriter<W>,
+}
+
+impl<W: Write> XmleWriter<W> {
+ /// Create a new instance of `XmleWriter`
+ /// ```
+ /// use std::io::Cursor;
+ /// use tamer::obj::xmle::writer::XmleWriter;
+ ///
+ /// let writer = Cursor::new(Vec::new());
+ /// let xmle_writer = XmleWriter::new(writer);
+ /// ```
+ pub fn new(write: W) -> Self {
+ let writer = XmlWriter::new_with_indent(write, b' ', 2);
+
+ Self { writer }
+ }
+
+ /// Consume the `XmleWriter` and return the inner `Write` object
+ ///
+ /// ```
+ /// use std::io::Cursor;
+ /// use tamer::obj::xmle::writer::XmleWriter;
+ ///
+ /// let writer = Cursor::new(Vec::new());
+ /// let xmle_writer = XmleWriter::new(writer);
+ /// assert!(xmle_writer.into_inner().into_inner().is_empty());
+ /// ```
+ pub fn into_inner(self) -> W {
+ self.writer.into_inner()
+ }
+
+ /// Write xmle
+ ///
+ /// Goes through each of the pre-ordered [`Sections`] and writes to the
+ /// buffer.
+ ///
+ /// ```
+ /// use std::io::Cursor;
+ /// use tamer::obj::xmle::writer::{Sections, XmleWriter};
+ /// use tamer::sym::{Symbol, SymbolIndex};
+ /// use tamer::sym::{DefaultInterner, Interner};
+ ///
+ /// let writer = Cursor::new(Vec::new());
+ /// let mut xmle_writer = XmleWriter::new(writer);
+ /// let sections = Sections::new();
+ /// let a = "foo";
+ /// let interner = DefaultInterner::new();
+ /// let name = interner.intern(&a);
+ /// xmle_writer.write(
+ /// &sections,
+ /// &name,
+ /// &String::from(""),
+ /// );
+ /// let buf = xmle_writer.into_inner().into_inner();
+ /// assert!(!buf.is_empty(), "something was written to the buffer");
+ /// ```
+ pub fn write(
+ &mut self,
+ sections: &Sections,
+ name: &Symbol,
+ relroot: &str,
+ ) -> Result {
+ self.write_start_package(name, &relroot)?
+ .write_element(b"l:dep", |writer| {
+ writer.write_sections(&sections, &relroot)
+ })?
+ // This was not in the original linker, but we need to be able to
+ // convey this information for `standalones` (which has received
+ // some logic from the old linker for the time being).
+ .write_element(b"l:map-from", |writer| {
+ writer.write_froms(&sections)
+ })?
+ .write_element(b"l:map-exec", |writer| {
+ writer.write_section(sections.map.iter())
+ })?
+ .write_element(b"l:retmap-exec", |writer| {
+ writer.write_section(sections.retmap.iter())
+ })?
+ .write_element(b"l:static", |writer| {
+ writer
+ .write_section(sections.meta.iter())?
+ .write_section(sections.worksheet.iter())?
+ .write_section(sections.params.iter())?
+ .write_section(sections.types.iter())?
+ .write_section(sections.funcs.iter())
+ })?
+ .write_element(b"l:exec", |writer| {
+ writer.write_section(sections.rater.iter())
+ })?
+ .write_end_tag(b"package")?;
+
+ Ok(())
+ }
+
+ /// Write an element
+ ///
+ /// This writes the opening tag, the content, and the closing tag for a
+ /// given element. The callback is what will write the element's body.
+ #[inline]
+ fn write_element<F>(
+ &mut self,
+ name: &[u8],
+ callback: F,
+ ) -> Result<&mut XmleWriter<W>>
+ where
+ F: FnOnce(&mut Self) -> Result<&mut XmleWriter<W>>,
+ {
+ self.write_start_tag(name)?;
+ (callback)(self)?;
+ self.write_end_tag(name)?;
+
+ Ok(self)
+ }
+
+ /// Open the `package` element
+ ///
+ /// The `package` element's opening tag needs attributes, so it cannot use
+ /// `write_start_tag` directly.
+ fn write_start_package(
+ &mut self,
+ name: &Symbol,
+ relroot: &str,
+ ) -> Result<&mut XmleWriter<W>> {
+ let root =
+ BytesStart::owned_name(b"package".to_vec()).with_attributes(vec![
+ ("xmlns", "http://www.lovullo.com/rater"),
+ ("xmlns:preproc", "http://www.lovullo.com/rater/preproc"),
+ ("xmlns:l", "http://www.lovullo.com/rater/linker"),
+ ("title", &name), // TODO
+ ("program", "true"),
+ ("name", &name),
+ ("__rootpath", &relroot),
+ ]);
+
+ self.writer.write_event(Event::Start(root))?;
+
+ Ok(self)
+ }
+
+ /// Open an element's tag
+ fn write_start_tag(&mut self, name: &[u8]) -> Result<&mut XmleWriter<W>> {
+ self.writer
+ .write_event(Event::Start(BytesStart::borrowed_name(name)))?;
+
+ Ok(self)
+ }
+
+ /// Close an element's tag
+ fn write_end_tag(&mut self, name: &[u8]) -> Result<&mut XmleWriter<W>> {
+ self.writer
+ .write_event(Event::End(BytesEnd::borrowed(name)))?;
+
+ Ok(self)
+ }
+
+ /// Write all [`Sections`]
+ ///
+ /// All the [`Sections`] found need to be written out using the `writer`
+ /// object.
+ fn write_sections(
+ &mut self,
+ sections: &Sections,
+ relroot: &str,
+ ) -> Result<&mut XmleWriter<W>> {
+ let all = sections
+ .meta
+ .iter()
+ .chain(sections.map.iter())
+ .chain(sections.retmap.iter())
+ .chain(sections.worksheet.iter())
+ .chain(sections.params.iter())
+ .chain(sections.types.iter())
+ .chain(sections.funcs.iter())
+ .chain(sections.rater.iter());
+
+ for ident in all {
+ match ident {
+ Object::Ident(sym, kind, src)
+ | Object::IdentFragment(sym, kind, src, _) => {
+ let name: &str = sym;
+
+ // this'll be formalized more sanely
+ let mut attrs = match kind {
+ IdentKind::Cgen(dim) => {
+ vec![("type", "cgen"), ("dim", dim.as_ref())]
+ }
+ IdentKind::Class(dim) => {
+ vec![("type", "class"), ("dim", dim.as_ref())]
+ }
+ IdentKind::Const(dim, dtype) => vec![
+ ("type", "const"),
+ ("dim", dim.as_ref()),
+ ("dtype", dtype.as_ref()),
+ ],
+ IdentKind::Func(dim, dtype) => vec![
+ ("type", "func"),
+ ("dim", dim.as_ref()),
+ ("dtype", dtype.as_ref()),
+ ],
+ IdentKind::Gen(dim, dtype) => vec![
+ ("type", "gen"),
+ ("dim", dim.as_ref()),
+ ("dtype", dtype.as_ref()),
+ ],
+ IdentKind::Lparam(dim, dtype) => vec![
+ ("type", "lparam"),
+ ("dim", dim.as_ref()),
+ ("dtype", dtype.as_ref()),
+ ],
+ IdentKind::Param(dim, dtype) => vec![
+ ("type", "param"),
+ ("dim", dim.as_ref()),
+ ("dtype", dtype.as_ref()),
+ ],
+ IdentKind::Rate(dtype) => {
+ vec![("type", "rate"), ("dtype", dtype.as_ref())]
+ }
+ IdentKind::Tpl => vec![("type", "tpl")],
+ IdentKind::Type(dtype) => {
+ vec![("type", "type"), ("dtype", dtype.as_ref())]
+ }
+ IdentKind::MapHead => vec![("type", "map:head")],
+ IdentKind::Map => vec![("type", "map")],
+ IdentKind::MapTail => vec![("type", "map:tail")],
+ IdentKind::RetMapHead => vec![("type", "retmap:head")],
+ IdentKind::RetMap => vec![("type", "retmap")],
+ IdentKind::RetMapTail => vec![("type", "retmap:tail")],
+ IdentKind::Meta => vec![("type", "meta")],
+ IdentKind::Worksheet => vec![("type", "worksheet")],
+ };
+
+ attrs.push(("name", name));
+
+ if src.generated {
+ attrs.push(("preproc:generated", "true"));
+ }
+
+ let srcpath: String;
+ if let Some(pkg_name) = src.pkg_name {
+ srcpath = format!("{}{}", relroot, pkg_name);
+ attrs.push(("src", &srcpath));
+ }
+ if let Some(parent) = src.parent {
+ attrs.push(("parent", parent));
+ }
+ if let Some(yields) = src.yields {
+ attrs.push(("yields", yields));
+ }
+ if let Some(desc) = &src.desc {
+ attrs.push(("desc", &desc));
+ }
+
+ let sym = BytesStart::owned_name(b"preproc:sym".to_vec())
+ .with_attributes(attrs);
+
+ self.writer.write_event(Event::Empty(sym))?;
+ }
+ _ => unreachable!("filtered out during sorting"),
+ }
+ }
+
+ Ok(self)
+ }
+
+ /// Write the source `from`
+ ///
+ /// If a `map` object has a `from` attribute in its source, we need to
+ /// write them using the `writer`'s `write_event`.
+ fn write_froms(
+ &mut self,
+ sections: &Sections,
+ ) -> Result<&mut XmleWriter<W>> {
+ let mut map_froms: FxHashSet<&str> = Default::default();
+
+ let map_iter = sections.map.iter();
+
+ for map_ident in map_iter {
+ match map_ident {
+ Object::Ident(_, _, src)
+ | Object::IdentFragment(_, _, src, _) => {
+ if let Some(froms) = &src.from {
+ for from in froms {
+ map_froms.insert(from);
+ }
+ }
+ }
+ _ => unreachable!("filtered out during sorting"),
+ }
+ }
+
+ for from in map_froms {
+ let name: &str = from;
+
+ self.writer.write_event(Event::Empty(
+ BytesStart::borrowed_name(b"l:from")
+ .with_attributes(vec![("name", name)]),
+ ))?;
+ }
+
+ Ok(self)
+ }
+
+ /// Write a ['Section`]
+ ///
+ /// Iterates through the parts of a `Section` and writes them using the
+ /// `writer`'s 'write_event`.
+ fn write_section(
+ &mut self,
+ idents: SectionIterator,
+ ) -> Result<&mut XmleWriter<W>> {
+ for ident in idents {
+ match ident {
+ Object::IdentFragment(_, _, _, frag) => {
+ self.writer.write_event(Event::Text(
+ BytesText::from_plain_str(frag),
+ ))?;
+ }
+ // Cgen, Gen, and Lparam are not expected to be present, so we
+ // can ignore them when we determeing when to return an Err.
+ Object::Ident(_, IdentKind::Cgen(_), _)
+ | Object::Ident(_, IdentKind::Gen(_, _), _)
+ | Object::Ident(_, IdentKind::Lparam(_, _), _) => (),
+ obj => {
+ return Err(WriterError::ExpectedFragment(format!(
+ "fragment expected: {:?}",
+ obj
+ )));
+ }
+ }
+ }
+
+ Ok(self)
+ }
+}
+
+#[cfg(test)]
+mod mock {
+ use super::*;
+
+ pub struct MockXmlWriter<W: Write> {
+ inner: W,
+ pub write_callback: Option<Box<dyn for<'a> Fn(&Event<'a>) -> Result>>,
+ }
+
+ impl<W: Write> MockXmlWriter<W> {
+ pub fn new(inner: W) -> Self {
+ Self {
+ inner,
+ write_callback: None,
+ }
+ }
+
+ pub fn new_with_indent(inner: W, _: u8, _: u8) -> Self {
+ Self::new(inner)
+ }
+
+ pub fn write_event<'a, E: AsRef<Event<'a>>>(
+ &mut self,
+ event: E,
+ ) -> Result<usize> {
+ (self
+ .write_callback
+ .as_ref()
+ .expect("missing mock write_callback"))(
+ event.as_ref()
+ )?;
+
+ Ok(0)
+ }
+
+ pub fn into_inner(self) -> W {
+ self.inner
+ }
+ }
+}
+
+#[cfg(test)]
+mod test {
+ use super::*;
+ use crate::ir::asg::Dim;
+ use crate::ir::asg::Source;
+ use crate::ir::legacyir::SymAttrs;
+ use crate::obj::xmle::writer::Section;
+ use crate::sym::{Symbol, SymbolIndex};
+ use std::str;
+
+ type Sut<W> = XmleWriter<W>;
+
+ #[test]
+ fn writer_uses_inner_buffer() -> Result {
+ let expected = vec![1, 2, 3];
+ let buf = expected.clone();
+
+ let sut = Sut::new(buf);
+
+ assert_eq!(expected, sut.into_inner());
+
+ Ok(())
+ }
+
+ #[test]
+ fn write_start_package() -> Result {
+ let mut sut = Sut::new(vec![]);
+ sut.writer.write_callback = Some(Box::new(|event| match event {
+ Event::Start(bytes_start) => {
+ let name = str::from_utf8(bytes_start.name());
+ match name {
+ Ok("package") => {
+ let attributes = bytes_start.attributes();
+ assert_eq!(7, attributes.count());
+ Ok(())
+ }
+ _ => panic!("unreachable"),
+ }
+ }
+ _ => panic!("did not match expected event"),
+ }));
+
+ let sym = Symbol::new_dummy(SymbolIndex::from_u32(1), "sym");
+
+ sut.write_start_package(&sym, &String::from(""))?;
+
+ Ok(())
+ }
+
+ #[test]
+ fn write_start_tag() -> Result {
+ let mut sut = Sut::new(vec![]);
+ sut.writer.write_callback = Some(Box::new(|event| match event {
+ Event::Start(bytes_start) => {
+ let name = str::from_utf8(bytes_start.name());
+ match name {
+ Ok("l:dep") => {
+ let attributes = bytes_start.attributes();
+ assert_eq!(0, attributes.count());
+ Ok(())
+ }
+ _ => panic!("unreachable"),
+ }
+ }
+ _ => panic!("did not match expected event"),
+ }));
+
+ sut.write_start_tag(b"l:dep")?;
+
+ Ok(())
+ }
+
+ #[test]
+ fn write_end_tag() -> Result {
+ let mut sut = Sut::new(vec![]);
+ sut.writer.write_callback = Some(Box::new(|event| match event {
+ Event::End(bytes_end) => {
+ let name = str::from_utf8(bytes_end.name());
+ assert_eq!("package", name?);
+ Ok(())
+ }
+ _ => panic!("did not match expected event"),
+ }));
+
+ sut.write_end_tag(b"package")?;
+
+ Ok(())
+ }
+
+ #[test]
+ fn write_section() -> Result {
+ let mut sut = Sut::new(vec![]);
+ sut.writer.write_callback = Some(Box::new(|event| match event {
+ Event::Text(_) => (Ok(())),
+ _ => panic!("did not trigger event"),
+ }));
+
+ let sym = Symbol::new_dummy(SymbolIndex::from_u32(1), "sym");
+ let obj = Object::IdentFragment(
+ &sym,
+ IdentKind::Meta,
+ Source::default(),
+ String::from(""),
+ );
+
+ let mut section = Section::new();
+ section.push_body(&obj);
+ sut.write_section(section.iter())?;
+
+ Ok(())
+ }
+
+ #[test]
+ fn write_section_ignores_other_kinds() -> Result {
+ let mut sut = Sut::new(vec![]);
+ sut.writer.write_callback = Some(Box::new(|_| {
+ panic!("callback should not have been called");
+ }));
+
+ let sym = Symbol::new_dummy(SymbolIndex::from_u32(1), "sym");
+ let obj = Object::Ident(
+ &sym,
+ IdentKind::Cgen(Dim::default()),
+ Source::default(),
+ );
+
+ let mut section = Section::new();
+ section.push_body(&obj);
+ sut.write_section(section.iter())?;
+
+ Ok(())
+ }
+
+ #[test]
+ fn write_section_catch_missing() -> Result {
+ let mut sut = Sut::new(vec![]);
+ sut.writer.write_callback = Some(Box::new(|_| {
+ panic!("callback should not have been called");
+ }));
+
+ let obj = Object::Empty;
+
+ let mut section = Section::new();
+ section.push_body(&obj);
+ let result = sut.write_section(section.iter());
+
+ match result {
+ Err(WriterError::ExpectedFragment(_)) => {}
+ _ => panic!("expected Err"),
+ }
+
+ Ok(())
+ }
+
+ #[test]
+ fn write_sections() -> Result {
+ let mut sut = Sut::new(vec![]);
+
+ sut.writer.write_callback = Some(Box::new(|event| match event {
+ Event::Empty(bytes_start) => {
+ let name = str::from_utf8(bytes_start.name())?;
+ assert_eq!("preproc:sym", name);
+ let mut attributes = bytes_start.attributes();
+ assert_eq!(2, attributes.clone().count());
+
+ let attr = attributes.next().expect("Expects attributes")?;
+ assert_eq!("type", str::from_utf8(attr.key)?);
+ assert_eq!("worksheet", str::from_utf8(&attr.value)?);
+
+ let attr = attributes.next().expect("Expects attributes")?;
+ assert_eq!("name", str::from_utf8(attr.key)?);
+ assert_eq!("random_symbol", str::from_utf8(&attr.value)?);
+
+ Ok(())
+ }
+ _ => panic!("unexpected event"),
+ }));
+
+ let sym = Symbol::new_dummy(SymbolIndex::from_u32(1), "random_symbol");
+ let object =
+ Object::Ident(&sym, IdentKind::Worksheet, Source::default());
+ let mut sections = Sections::new();
+ sections.map.push_body(&object);
+ sut.write_sections(&sections, &String::from(""))?;
+
+ Ok(())
+ }
+
+ #[test]
+ fn write_sections_with_sources() -> Result {
+ let mut sut = Sut::new(vec![]);
+
+ sut.writer.write_callback = Some(Box::new(|event| match event {
+ Event::Empty(bytes_start) => {
+ let name = str::from_utf8(bytes_start.name())?;
+ assert_eq!("preproc:sym", name);
+
+ let mut attributes = bytes_start.attributes();
+ assert_eq!(7, attributes.clone().count());
+
+ let attr = attributes.next().expect("Expects attributes")?;
+ assert_eq!("type", str::from_utf8(attr.key)?);
+ assert_eq!("worksheet", str::from_utf8(&attr.value)?);
+
+ let attr = attributes.next().expect("Expects attributes")?;
+ assert_eq!("name", str::from_utf8(attr.key)?);
+ assert_eq!("name", str::from_utf8(&attr.value)?);
+
+ let attr = attributes.next().expect("Expects attributes")?;
+ assert_eq!("preproc:generated", str::from_utf8(attr.key)?);
+ assert_eq!("true", str::from_utf8(&attr.value)?);
+
+ let attr = attributes.next().expect("Expects attributes")?;
+ assert_eq!("src", str::from_utf8(attr.key)?);
+ assert_eq!("rootname", str::from_utf8(&attr.value)?);
+
+ let attr = attributes.next().expect("Expects attributes")?;
+ assert_eq!("parent", str::from_utf8(attr.key)?);
+ assert_eq!("parent", str::from_utf8(&attr.value)?);
+
+ let attr = attributes.next().expect("Expects attributes")?;
+ assert_eq!("yields", str::from_utf8(attr.key)?);
+ assert_eq!("yields", str::from_utf8(&attr.value)?);
+
+ let attr = attributes.next().expect("Expects attributes")?;
+ assert_eq!("desc", str::from_utf8(attr.key)?);
+ assert_eq!("sym desc", str::from_utf8(&attr.value)?);
+
+ Ok(())
+ }
+ _ => panic!("unexpected event"),
+ }));
+
+ let nsym = Symbol::new_dummy(SymbolIndex::from_u32(1), "name");
+ let ssym = Symbol::new_dummy(SymbolIndex::from_u32(2), "src");
+ let psym = Symbol::new_dummy(SymbolIndex::from_u32(3), "parent");
+ let ysym = Symbol::new_dummy(SymbolIndex::from_u32(4), "yields");
+ let fsym = Symbol::new_dummy(SymbolIndex::from_u32(5), "from");
+
+ let attrs = SymAttrs {
+ pkg_name: Some(&nsym),
+ src: Some(&ssym),
+ generated: true,
+ parent: Some(&psym),
+ yields: Some(&ysym),
+ desc: Some("sym desc".to_string()),
+ from: Some(vec![&fsym]),
+ virtual_: true,
+ ..Default::default()
+ };
+ let object = Object::Ident(&nsym, IdentKind::Worksheet, attrs.into());
+ let mut sections = Sections::new();
+ sections.map.push_body(&object);
+ sut.write_sections(&sections, &String::from("root"))?;
+
+ Ok(())
+ }
+
+ #[test]
+ fn write_froms() -> Result {
+ let mut sut = Sut::new(vec![]);
+
+ sut.writer.write_callback = Some(Box::new(|event| match event {
+ Event::Empty(bytes_start) => {
+ let name = str::from_utf8(bytes_start.name())?;
+ assert_eq!("l:from", name);
+
+ let mut attributes = bytes_start.attributes();
+ assert_eq!(1, attributes.clone().count());
+
+ let attr = attributes.next().expect("Expects attributes")?;
+ assert_eq!("name", str::from_utf8(attr.key)?);
+ assert_eq!("dest symbol", str::from_utf8(&attr.value)?);
+
+ Ok(())
+ }
+ _ => panic!("unexpected event"),
+ }));
+
+ let sym = Symbol::new_dummy(SymbolIndex::from_u32(1), "source symbol");
+ let symb = Symbol::new_dummy(SymbolIndex::from_u32(2), "dest symbol");
+
+ let mut src = Source::default();
+ src.from = Some(vec![&symb]);
+ let object = Object::Ident(&sym, IdentKind::Worksheet, src);
+ let mut sections = Sections::new();
+ sections.map.push_body(&object);
+ sut.write_froms(&sections)?;
+
+ Ok(())
+ }
+
+ #[test]
+ fn write_froms_no_from_no_write() -> Result {
+ let mut sut = Sut::new(vec![]);
+
+ sut.writer.write_callback = Some(Box::new(|event| match event {
+ _ => panic!("unexpected write"),
+ }));
+
+ let sym = Symbol::new_dummy(SymbolIndex::from_u32(1), "random_symbol");
+
+ let object =
+ Object::Ident(&sym, IdentKind::Worksheet, Source::default());
+ let mut sections = Sections::new();
+ sections.map.push_body(&object);
+ sut.write_froms(&sections)?;
+
+ Ok(())
+ }
+
+ #[test]
+ fn write_element() -> Result {
+ let mut sut = Sut::new(vec![]);
+
+ sut.writer.write_callback = Some(Box::new(|event| match event {
+ Event::Start(bytes) => {
+ let name = str::from_utf8(bytes.name());
+ match name {
+ Ok("foo") => {
+ let attributes = bytes.attributes();
+ assert_eq!(0, attributes.count());
+ Ok(())
+ }
+ _ => panic!("unreachable"),
+ }
+ }
+ Event::End(bytes) => {
+ let name = str::from_utf8(bytes.name());
+ match name {
+ Ok("foo") => Ok(()),
+ _ => panic!("unreachable"),
+ }
+ }
+ _ => panic!("did not match expected event"),
+ }));
+
+ sut.write_element(b"foo", |writer| Ok(writer))?;
+
+ Ok(())
+ }
+}
diff --git a/tamer/src/obj/xmlo/mod.rs b/tamer/src/obj/xmlo/mod.rs
new file mode 100644
index 0000000..8c38c28
--- /dev/null
+++ b/tamer/src/obj/xmlo/mod.rs
@@ -0,0 +1,75 @@
+// xmlo object files
+//
+// Copyright (C) 2014-2019 Ryan Specialty Group, LLC.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+//! `xmlo` object file construction and processing.
+//!
+//! This object file format exists for compatibility with the old compiler
+//! written in XSLT;
+//! it will be removed in the future.
+//!
+//!
+//! `xmlo` Object Files
+//! ===================
+//! An `xmlo` object file is produced by the for each source file.
+//! It is a terribly inefficient object format and will be eliminated in the
+//! future.
+//! The format is XML because the original compiler was written in XSLT.
+//!
+//! The general structure of an `xmlo` file consists of:
+//! - Package metadata as attributes on the root node;
+//! - A symbol table along with symbol metadata;
+//! - Symbol dependencies (as [adjacency lists][]);
+//! - Compiled JavaScript fragments for each applicable symbol; and
+//! - Expanded source XML.
+//!
+//! [adjacency lists]: https://en.wikipedia.org/wiki/Adjacency_list
+//!
+//! For example (with some extra information omitted):
+//!
+//! ```xml
+//! <package xmlns="http://www.lovullo.com/rater"
+//! xmlns:preproc="http://www.lovullo.com/rater/preproc"
+//! title="Example Package"
+//! name="example/package"
+//! __rootpath="../"
+//! preproc:elig-class-yields="isEligexamplepackage">
+//! <!-- Symbol table -->
+//! <preproc:symtable>
+//! <preproc:sym name=":class:some-sym" type="class" ... />
+//! <!-- ... -->
+//! </preproc:symtable>
+//!
+//! <!-- Dependency graph (adjacency lists) -->
+//! <preproc:sym-deps>
+//! <preproc:sym-dep name=":class:some-sym">
+//! <preproc:sym-ref name="someOtherSym" />
+//! <!-- ... -->
+//! </preproc:sym-dep>
+//! </preproc:sym-deps>
+//!
+//! <!-- Compiled JS fragments -->
+//! <preproc:fragments>
+//! <preproc:fragment id=":class:some-sym">
+//! classes['some-sym'] = '...generated JS code...';
+//! </preproc:fragment>
+//! </preproc:fragments>
+//!
+//! <!-- Expanded src -->
+//! </package>
+//! ```
+
+pub mod reader;
diff --git a/tamer/src/obj/xmlo/reader.rs b/tamer/src/obj/xmlo/reader.rs
new file mode 100644
index 0000000..3f33ddd
--- /dev/null
+++ b/tamer/src/obj/xmlo/reader.rs
@@ -0,0 +1,1787 @@
+// xmlo object file reader
+//
+// Copyright (C) 2014-2019 Ryan Specialty Group, LLC.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+//! `xmlo` object file reader.
+//!
+//! This defines a lower-level event-based [`XmloReader`] similar to that of
+//! [`quick_xml`] (see [`XmloEvent`]),
+//! where the events are a slightly higher-level abstraction over the
+//! types of nodes present in the file.
+//!
+//! _Note that a "symbol" in the `xmlo` sense differs slightly from
+//! [`Symbol`];_
+//! the former is more akin to an identifier.
+//!
+//! For more information on `xmlo` files,
+//! see the [parent crate][super].
+//!
+//!
+//! How To Use
+//! ==========
+//! The event-based API for [`XmloReader`] is similar to that of
+//! [`quick_xml`],
+//! except that the [`XmloResult`] produces
+//! [Legacy IR](crate::ir::legacyir).
+//! There is minor overhead incurred from parsing if the emitted events are
+//! not used,
+//! but it is quite minimal.
+//! The only lifetime one has to worry about is the lifetime of the
+//! [`Interner`] used to produce symbols.
+//!
+//! The next [`XmloEvent`] is retrieved using [`XmloReader::read_event`].
+//! _You should stop reading at [`XmloEvent::Eoh`];_
+//! reading the remainder of the object file has not yet been implemented.
+//!
+//! ```
+//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
+//! use tamer::obj::xmlo::reader::{XmloReader, XmloEvent};
+//! use tamer::ir::legacyir::SymType;
+//! use tamer::sym::{DefaultInterner, Interner};
+//!
+//! let xmlo = br#"<package name="foo">
+//! <preproc:symtable>
+//! <preproc:sym name="syma" type="class" />
+//! <preproc:sym name="symb" type="cgen" />
+//! </preproc:symtable>
+//!
+//! <preproc:sym-deps>
+//! <preproc:sym-dep name="syma">
+//! <preproc:sym-ref name="depa-1" />
+//! <preproc:sym-ref name="depa-2" />
+//! </preproc:sym-dep>
+//! <preproc:sym-dep name="symb">
+//! <preproc:sym-ref name="depb-1" />
+//! </preproc:sym-dep>
+//! </preproc:sym-deps>
+//!
+//! <preproc:fragments>
+//! <preproc:fragment id="syma">syma text</preproc:fragment>
+//! <preproc:fragment id="symb">symb text</preproc:fragment>
+//! </preproc:fragments>
+//! </package>"#;
+//!
+//! let interner = DefaultInterner::new();
+//! let mut reader = XmloReader::new(xmlo as &[u8], &interner);
+//!
+//! let mut pkgname = None;
+//! let mut syms = Vec::new();
+//! let mut deps = Vec::new();
+//! let mut fragments = Vec::new();
+//!
+//! loop {
+//! match reader.read_event()? {
+//! XmloEvent::Package(attrs) => pkgname = attrs.name,
+//! XmloEvent::SymDecl(sym, attrs) => syms.push((sym, attrs.ty)),
+//! XmloEvent::SymDeps(sym, symdeps) => deps.push((sym, symdeps)),
+//! XmloEvent::Fragment(sym, text) => fragments.push((sym, text)),
+//!
+//! // Do not read past end of header.
+//! XmloEvent::Eoh => break,
+//! }
+//! }
+//!
+//! assert_eq!(Some(interner.intern("foo")), pkgname);
+//!
+//! assert_eq!(
+//! vec![
+//! (interner.intern("syma"), Some(SymType::Class)),
+//! (interner.intern("symb"), Some(SymType::Cgen)),
+//! ],
+//! syms
+//! );
+//!
+//! assert_eq!(
+//! vec![
+//! (interner.intern("syma"), vec![
+//! interner.intern("depa-1"),
+//! interner.intern("depa-2"),
+//! ]),
+//! (interner.intern("symb"), vec![
+//! interner.intern("depb-1"),
+//! ]),
+//! ],
+//! deps
+//! );
+//!
+//! assert_eq!(
+//! vec![
+//! (interner.intern("syma"), "syma text".into()),
+//! (interner.intern("symb"), "symb text".into()),
+//! ],
+//! fragments
+//! );
+//!
+//! # Ok(())
+//! # }
+//! ```
+
+use crate::ir::legacyir::{PackageAttrs, SymAttrs, SymType};
+use crate::sym::{Interner, Symbol};
+#[cfg(test)]
+use crate::test::quick_xml::MockBytesStart as BytesStart;
+#[cfg(test)]
+use crate::test::quick_xml::MockXmlEvent as XmlEvent;
+#[cfg(test)]
+use mock::MockXmlReader as XmlReader;
+#[cfg(not(test))]
+use quick_xml::events::BytesStart;
+#[cfg(not(test))]
+use quick_xml::events::Event as XmlEvent;
+use quick_xml::Error as XmlError;
+#[cfg(not(test))]
+use quick_xml::Reader as XmlReader;
+use std::convert::TryInto;
+use std::fmt::Display;
+use std::io::BufRead;
+use std::result::Result;
+
+/// A [`Result`] with a hard-coded [`XmloError`] error type.
+///
+/// This is the result of every [`XmloReader`] operation that could
+/// potentially fail in error.
+pub type XmloResult<T> = Result<T, XmloError>;
+
+/// Wrapper around [`quick_xml::Reader`] for reading and parsing `xmlo`
+/// object files.
+///
+/// This reader performs interning (see [`Interner`]) for data that is
+/// expected to be duplicated or compared.
+/// Other data are converted into more concise representations where
+/// possible,
+/// or are returned as owned [`String`] values otherwise,
+/// with the understanding that values will be persisted within an IR
+/// anyway.
+/// This reader stores symbol attributes in the Legacy IR's [`SymAttrs`].
+///
+/// See [module-level documentation](self) for more information and
+/// examples.
+pub struct XmloReader<'i, B, I>
+where
+ B: BufRead,
+ I: Interner<'i>,
+{
+ /// Source `xmlo` reader.
+ reader: XmlReader<B>,
+
+ /// Internal buffer for [`XmlReader`].
+ buffer: Vec<u8>,
+
+ /// Another internal buffer for [`XmlReader`].
+ ///
+ /// This buffer exists to work around ownership rules.
+ /// TODO: It this worth removing? If not, remove this TODO.
+ sub_buffer: Vec<u8>,
+
+ /// String internment system.
+ interner: &'i I,
+
+ /// Whether the root has been validated.
+ ///
+ /// This is used to ensure that we provide an error early on if we try
+ /// to process something that isn't a package.
+ seen_root: bool,
+
+ /// Name of the package currently being read.
+ ///
+ /// This is known after processing the root `package` element,
+ /// provided that it's a proper root node.
+ pkg_name: Option<&'i Symbol<'i>>,
+}
+
+impl<'i, B: BufRead, I: Interner<'i>> XmloReader<'i, B, I> {
+ /// Construct a new reader.
+ pub fn new(reader: B, interner: &'i I) -> Self {
+ let mut reader = XmlReader::from_reader(reader);
+
+ // xmlo files are compiler output and should be trusted
+ reader.check_end_names(false);
+
+ Self {
+ reader,
+ // TODO: option to accept buffer
+ buffer: Vec::new(),
+ sub_buffer: Vec::new(),
+ interner,
+ seen_root: false,
+ pkg_name: None,
+ }
+ }
+
+ /// Continue reading and produce the next event.
+ ///
+ /// An [`XmloEvent::Eoh`] event is emitted at the end of the header
+ /// (at the closing `preproc:fragment` node).
+ ///
+ /// Stack Warning
+ /// =============
+ /// The source file will be read until an event can be produced.
+ /// This is recursive on the underlying [`XmlReader::read_event`],
+ /// and Rust dues not (at the time of writing) support tail call
+ /// optimization.
+ /// This shouldn't be a concern for proper `xmlo` files as long as you
+ /// acknowledge [`XmloEvent::Eoh`] and do not continue reading
+ /// further.
+ ///
+ /// Errors
+ /// ======
+ /// - Any of [`XmloError`].
+ /// See private methods for more information.
+ ///
+ /// TODO: Augment failures with context
+ pub fn read_event<'a>(&mut self) -> XmloResult<XmloEvent<'i>> {
+ let event = self.reader.read_event(&mut self.buffer)?;
+
+ // Ensure that the first encountered node is something we expect
+ if !self.seen_root {
+ match &event {
+ // We don't process namespaces, so we have to guess what
+ // they may be (map xmlo files differ, for example)
+ XmlEvent::Start(ele) => {
+ if !(ele.name() == b"package"
+ || ele.name() == b"lv:package")
+ {
+ return Err(XmloError::UnexpectedRoot);
+ }
+
+ self.seen_root = true;
+ }
+ _ => return self.read_event(),
+ }
+ }
+
+ match event {
+ XmlEvent::Empty(ele) if ele.name() == b"preproc:sym" => {
+ Self::process_sym(&self.pkg_name, &ele, self.interner)
+ }
+
+ XmlEvent::Start(ele) => match ele.name() {
+ b"package" | b"lv:package" => {
+ let attrs = Self::process_package(&ele, self.interner)?;
+
+ self.pkg_name = attrs.name;
+
+ Ok(XmloEvent::Package(attrs))
+ }
+
+ b"preproc:sym-dep" => Self::process_dep(
+ &ele,
+ self.interner,
+ &mut self.reader,
+ &mut self.sub_buffer,
+ ),
+
+ b"preproc:fragment" => Self::process_fragment(
+ &ele,
+ self.interner,
+ &mut self.reader,
+ &mut self.sub_buffer,
+ ),
+
+ // `func` symbols include additional data for param
+ // ordering, which we don't care about. But `map` includes
+ // source field information which we want to keep. (We
+ // don't care about `retmap` for our purposes.)
+ b"preproc:sym" => {
+ let mut event =
+ Self::process_sym(&self.pkg_name, &ele, self.interner)?;
+
+ match &mut event {
+ XmloEvent::SymDecl(_, attrs)
+ if attrs.ty == Some(SymType::Map) =>
+ {
+ attrs.from = Some(Self::process_map_from(
+ self.interner,
+ &mut self.reader,
+ &mut self.sub_buffer,
+ )?);
+
+ Ok(event)
+ }
+ _ => {
+ self.reader.read_to_end(
+ ele.name(),
+ &mut self.sub_buffer,
+ )?;
+
+ Ok(event)
+ }
+ }
+ }
+
+ // Just like the outer match, recurse
+ _ => self.read_event(),
+ },
+
+ XmlEvent::End(ele) if ele.name() == b"preproc:fragments" => {
+ Ok(XmloEvent::Eoh)
+ }
+
+ // Ignore and recurse, looking for something we can process
+ _ => self.read_event(),
+ }
+ }
+
+ /// Process `lv:package` element attributes.
+ ///
+ /// The result is an [`XmloEvent::Package`] containing each applicable
+ /// attribute,
+ /// parsed.
+ fn process_package<'a>(
+ ele: &'a BytesStart<'a>,
+ interner: &'i I,
+ ) -> XmloResult<PackageAttrs<'i>> {
+ let mut program = false;
+ let mut elig: Option<&'i Symbol<'i>> = None;
+ let mut name: Option<&'i Symbol<'i>> = None;
+ let mut relroot: Option<String> = None;
+
+ for attr in ele.attributes().with_checks(false).filter_map(Result::ok) {
+ match attr.key {
+ b"name" => {
+ name = Some(unsafe {
+ interner.intern_utf8_unchecked(&attr.value)
+ });
+ }
+
+ b"__rootpath" => {
+ relroot = Some(unsafe {
+ String::from_utf8_unchecked(attr.value.to_vec())
+ });
+ }
+
+ b"program" => {
+ program = &*attr.value == b"true";
+ }
+
+ b"preproc:elig-class-yields" => {
+ elig = Some(unsafe {
+ interner.intern_utf8_unchecked(&attr.value)
+ });
+ }
+
+ _ => (),
+ }
+ }
+
+ // TODO: proper errors, no panic
+ Ok(PackageAttrs {
+ name,
+ relroot,
+ program,
+ elig,
+ ..Default::default()
+ })
+ }
+
+ /// Process `preproc:sym` element attributes.
+ ///
+ /// The symbol name `preproc:sym/@name` is interned.
+ /// All other known attributes are parsed
+ /// and unknown attributes are ignored.
+ ///
+ /// The result is a single [`XmloEvent::SymDecl`] with an interned
+ /// `preproc:sym/@name`.
+ ///
+ /// Errors
+ /// ======
+ /// - [`XmloError::UnassociatedSym`] if missing `preproc:sym/@name`.
+ fn process_sym<'a>(
+ pkg_name: &Option<&'i Symbol<'i>>,
+ ele: &'a BytesStart<'a>,
+ interner: &'i I,
+ ) -> XmloResult<XmloEvent<'i>> {
+ let mut name: Option<&'i Symbol<'i>> = None;
+ let mut sym_attrs = SymAttrs::default();
+
+ for attr in ele.attributes().with_checks(false).filter_map(Result::ok) {
+ match attr.key {
+ b"name" => {
+ name = Some(unsafe {
+ interner.intern_utf8_unchecked(&attr.value)
+ });
+ }
+
+ b"src" => {
+ sym_attrs.src = Some(unsafe {
+ interner.intern_utf8_unchecked(&attr.value)
+ });
+ }
+
+ b"type" => {
+ sym_attrs.ty =
+ Some((*attr.value).try_into().map_err(|_| {
+ XmloError::InvalidType(unsafe {
+ String::from_utf8_unchecked(attr.value.to_vec())
+ })
+ })?);
+ }
+
+ b"dim" => {
+ sym_attrs.dim = Some(Self::dim_to_u8(&attr.value)?);
+ }
+
+ b"dtype" => {
+ sym_attrs.dtype =
+ Some((*attr.value).try_into().map_err(|_| {
+ XmloError::InvalidDtype(unsafe {
+ String::from_utf8_unchecked(attr.value.to_vec())
+ })
+ })?);
+ }
+
+ b"extern" => {
+ sym_attrs.extern_ = &*attr.value == b"true";
+ }
+
+ b"preproc:generated" => {
+ sym_attrs.generated = &*attr.value == b"true";
+ }
+
+ b"parent" => {
+ sym_attrs.parent = Some(unsafe {
+ interner.intern_utf8_unchecked(&attr.value)
+ });
+ }
+
+ b"yields" => {
+ sym_attrs.yields = Some(unsafe {
+ interner.intern_utf8_unchecked(&attr.value)
+ });
+ }
+
+ b"desc" => {
+ sym_attrs.desc = Some(unsafe {
+ String::from_utf8_unchecked(attr.value.to_vec())
+ });
+ }
+
+ b"virtual" => {
+ sym_attrs.virtual_ = &*attr.value == b"true";
+ }
+
+ b"isoverride" => {
+ sym_attrs.override_ = &*attr.value == b"true";
+ }
+
+ // As this reader evolves, we may wish to provide an error
+ // for unknown attributes so that we can be sure that we've
+ // handled them all.
+ _ => (),
+ }
+ }
+
+ sym_attrs.pkg_name = *pkg_name;
+
+ name.map(|name_sym| XmloEvent::SymDecl(name_sym, sym_attrs))
+ .ok_or(XmloError::UnassociatedSym)
+ }
+
+ /// Process `preproc:from` for `preproc:sym[@type="map"]` elements.
+ ///
+ /// Map symbols contain additional information describing source
+ /// inputs external to the system.
+ ///
+ /// Errors
+ /// ======
+ /// - [`XmloError::InvalidMapFrom`] if `@name` missing or if unexpected
+ /// data (e.g. elements) are encountered.
+ /// - [`XmloError::XmlError`] on XML parsing failure.
+ fn process_map_from<'a>(
+ interner: &'i I,
+ reader: &mut XmlReader<B>,
+ buffer: &mut Vec<u8>,
+ ) -> XmloResult<Vec<&'i Symbol<'i>>> {
+ let mut froms = Vec::new();
+
+ loop {
+ match reader.read_event(buffer)? {
+ XmlEvent::Empty(ele) if ele.name() == b"preproc:from" => froms
+ .push(
+ ele.attributes()
+ .with_checks(false)
+ .filter_map(Result::ok)
+ .find(|attr| attr.key == b"name")
+ .map_or(
+ Err(XmloError::InvalidMapFrom(
+ "preproc:from/@name missing".into(),
+ )),
+ |attr| {
+ Ok(unsafe {
+ interner
+ .intern_utf8_unchecked(&attr.value)
+ })
+ },
+ )?,
+ ),
+
+ XmlEvent::End(ele) if ele.name() == b"preproc:sym" => break,
+
+ // Note that whitespace counts as text
+ XmlEvent::Text(_) => (),
+
+ _ => Err(XmloError::InvalidMapFrom("unexpected data".into()))?,
+ };
+ }
+
+ Ok(froms)
+ }
+
+ /// Process `preproc:sym-dep` element.
+ ///
+ /// This represents an adjacency list for a given identifier in the
+ /// dependency graph.
+ /// The structure of this element is looks like this:
+ ///
+ /// ```xml
+ /// <preproc:sym-dep name=":class:some-sym">
+ /// <preproc:sym-ref name="someOtherSym" />
+ /// <!-- ... -->
+ /// </preproc:sym-dep>
+ /// ```
+ ///
+ /// This function will read any number of `preproc:sym-ref` nodes and
+ /// produce a single [`XmloEvent::SymDeps`] containing a [`Symbol`]
+ /// for `preproc:sym-dep/@name` and for each `preproc:sym-ref/@name`.
+ ///
+ /// Errors
+ /// ======
+ /// - [`XmloError::UnassociatedSymDep`] if missing `preproc:sym-dep/@name`.
+ /// - [`XmloError::MalformedSymRef`] if missing `preproc:sym-ref/@name`
+ /// or if any `preproc:sym-dep/node()` is not a `prepreoc:sym-ref`.
+ /// - [`XmloError::XmlError`] on XML parsing failure.
+ fn process_dep<'a>(
+ ele: &'a BytesStart<'a>,
+ interner: &'i I,
+ reader: &mut XmlReader<B>,
+ buffer: &mut Vec<u8>,
+ ) -> XmloResult<XmloEvent<'i>> {
+ let name = ele
+ .attributes()
+ .with_checks(false)
+ .filter_map(Result::ok)
+ .find(|attr| attr.key == b"name")
+ .map_or(Err(XmloError::UnassociatedSymDep), |attr| {
+ Ok(unsafe { interner.intern_utf8_unchecked(&attr.value) })
+ })?;
+
+ let mut deps = Vec::new();
+
+ loop {
+ match reader.read_event(buffer)? {
+ XmlEvent::Empty(symref)
+ if symref.name() == b"preproc:sym-ref" =>
+ {
+ deps.push(
+ symref
+ .attributes()
+ .with_checks(false)
+ .filter_map(Result::ok)
+ .find(|attr| attr.key == b"name")
+ .map_or(
+ Err(XmloError::MalformedSymRef(
+ "preproc:sym-ref/@name missing".into(),
+ )),
+ |attr| {
+ Ok(unsafe {
+ interner.intern_utf8_unchecked(&attr.value)
+ })
+ },
+ )?,
+ );
+ }
+
+ // We assume that elements are properly nested, so this must
+ // be the closing preproc:sym-dep tag.
+ XmlEvent::End(_) => break,
+
+ // Note that whitespace counts as text
+ XmlEvent::Text(_) => (),
+
+ _ => return Err(XmloError::MalformedSymRef(format!(
+ "preproc:sym-dep must contain only preproc:sym-ref children for `{}`",
+ name,
+ )))
+ }
+ }
+
+ Ok(XmloEvent::SymDeps(name, deps))
+ }
+
+ /// Process `preproc:fragment` element.
+ ///
+ /// This element represents the compiled code for the given symbol.
+ /// The result is a single [`XmloEvent::Fragment`] with an owned
+ /// [`String`] fragment and an interned `preproc:fragment/@id`.
+ ///
+ /// Errors
+ /// ======
+ /// - [`XmloError::UnassociatedFragment`] if missing `preproc:fragment/@id`.
+ /// - [`XmloError::MissingFragmentText`] if missing
+ /// `preproc:fragment/text()`.
+ /// - [`XmloError::XmlError`] for XML parsing errors.
+ fn process_fragment<'a>(
+ ele: &'a BytesStart<'a>,
+ interner: &'i I,
+ reader: &mut XmlReader<B>,
+ buffer: &mut Vec<u8>,
+ ) -> XmloResult<XmloEvent<'i>> {
+ let mut src_attrs = ele.attributes();
+ let mut filtered = src_attrs.with_checks(false).filter_map(Result::ok);
+
+ let id = filtered
+ .find(|attr| attr.key == b"id")
+ .filter(|attr| &*attr.value != b"")
+ .map_or(Err(XmloError::UnassociatedFragment), |attr| {
+ Ok(unsafe { interner.intern_utf8_unchecked(&attr.value) })
+ })?;
+
+ let text =
+ reader
+ .read_text(ele.name(), buffer)
+ .map_err(|err| match err {
+ XmlError::TextNotFound => {
+ XmloError::MissingFragmentText(id.to_string())
+ }
+ err => XmloError::XmlError(err),
+ })?;
+
+ Ok(XmloEvent::Fragment(id, text))
+ }
+
+ /// Convert single-character `@dim` to a [`u8`].
+ ///
+ /// Errors
+ /// ======
+ /// - [`XmloError::InvalidDim`] if first character is not an ASCII
+ /// digit,
+ /// or if there is more than one character.
+ fn dim_to_u8(value: &[u8]) -> XmloResult<u8> {
+ // Technically this could allow for incorrect inputs (we only take
+ // the first character)
+ if value.len() > 1 || !value[0].is_ascii_digit() {
+ return Err(XmloError::InvalidDim(unsafe {
+ String::from_utf8_unchecked(value.to_vec())
+ }));
+ }
+
+ Ok(value[0] - b'0')
+ }
+}
+
+/// `xmlo` reader events.
+///
+/// All data are parsed rather than being returned as [`u8`] slices,
+/// which avoids having to deal with awkward borrows or data copying since
+/// these data will likely be persisted in memory anyway.
+///
+/// To avoid extra data copying,
+/// we should instead prefer not to put data into object files that won't
+/// be useful and can't be easily skipped without parsing.
+#[derive(Debug, PartialEq, Eq)]
+pub enum XmloEvent<'i> {
+ /// Package declaration.
+ ///
+ /// This contains data gathered from the root `lv:package` node.
+ Package(PackageAttrs<'i>),
+
+ /// Symbol declaration.
+ ///
+ /// This represents an entry in the symbol table,
+ /// which includes a symbol along with its variable metadata as
+ /// [`SymAttrs`].
+ SymDecl(&'i Symbol<'i>, SymAttrs<'i>),
+
+ /// Dependencies of a given symbol.
+ ///
+ /// Note that, for simplicity, an owned vector is returned rather than a
+ /// slice into an internal buffer.
+ SymDeps(&'i Symbol<'i>, Vec<&'i Symbol<'i>>),
+
+ /// Text (compiled code) fragment for a given symbol.
+ ///
+ /// This contains the compiler output for a given symbol,
+ /// and is returned here as an owned value.
+ /// Given that fragments can be quite large,
+ /// a caller not interested in these data should choose to skip
+ /// fragments entirely rather than simply ignoring fragment events.
+ Fragment(&'i Symbol<'i>, String),
+
+ /// End-of-header.
+ ///
+ /// The header of an `xmlo` file is defined as the symbol table;
+ /// dependency list; and fragments.
+ /// This event is emitted at the closing `preproc:fragment` node.
+ Eoh,
+}
+
+/// Error during `xmlo` processing.
+///
+/// Errors contain only owned values rather than references to original
+/// data since they represent conditions requiring termination from
+/// malformed compiler output,
+/// and so should rarely occur.
+/// This drastically simplifies the reader and [`Result`] chaining.
+///
+/// TODO: These errors provide no context (byte offset).
+#[derive(Debug)]
+pub enum XmloError {
+ /// XML parsing error.
+ XmlError(XmlError),
+ /// The root node was not an `lv:package`.
+ UnexpectedRoot,
+ /// A `preproc:sym` node was found, but is missing `@name`.
+ UnassociatedSym,
+ /// The provided `preproc:sym/@type` is unknown or invalid.
+ InvalidType(String),
+ /// The provided `preproc:sym/@dtype` is unknown or invalid.
+ InvalidDtype(String),
+ /// The provided `preproc:sym/@dim` is invalid.
+ InvalidDim(String),
+ /// A `preproc:sym-dep` element was found, but is missing `@name`.
+ UnassociatedSymDep,
+ /// The `preproc:sym[@type="map"]` contains unexpected or invalid data.
+ InvalidMapFrom(String),
+ /// Invalid dependency in adjacency list
+ /// (`preproc:sym-dep/preproc:sym-ref`).
+ MalformedSymRef(String),
+ /// A `preproc:fragment` element was found, but is missing `@id`.
+ UnassociatedFragment,
+ /// A `preproc:fragment` element was found, but is missing `text()`.
+ MissingFragmentText(String),
+}
+
+impl From<XmlError> for XmloError {
+ fn from(e: XmlError) -> Self {
+ XmloError::XmlError(e)
+ }
+}
+
+impl Display for XmloError {
+ fn fmt(&self, fmt: &mut std::fmt::Formatter) -> std::fmt::Result {
+ match self {
+ XmloError::XmlError(e) => e.fmt(fmt),
+ XmloError::UnexpectedRoot => {
+ write!(fmt, "unexpected package root (is this a package?)")
+ }
+ XmloError::UnassociatedSym => write!(
+ fmt,
+ "unassociated symbol table entry: preproc:sym/@name missing"
+ ),
+ XmloError::InvalidType(ty) => {
+ write!(fmt, "invalid preproc:sym/@type `{}`", ty)
+ }
+ XmloError::InvalidDtype(dtype) => {
+ write!(fmt, "invalid preproc:sym/@dtype `{}`", dtype)
+ }
+ XmloError::InvalidDim(dim) => {
+ write!(fmt, "invalid preproc:sym/@dim `{}`", dim)
+ }
+ XmloError::InvalidMapFrom(msg) => {
+ write!(fmt, "invalid preproc:sym[@type=\"map\"]: {}", msg)
+ }
+ XmloError::UnassociatedSymDep => write!(
+ fmt,
+ "unassociated dependency list: preproc:sym-dep/@name missing"
+ ),
+ XmloError::MalformedSymRef(msg) => {
+ write!(fmt, "malformed dependency ref: {}", msg)
+ }
+ XmloError::UnassociatedFragment => write!(
+ fmt,
+ "unassociated fragment: preproc:fragment/@id missing"
+ ),
+ XmloError::MissingFragmentText(symname) => write!(
+ fmt,
+ "fragment found, but missing text for symbol `{}`",
+ symname,
+ ),
+ }
+ }
+}
+
+impl std::error::Error for XmloError {
+ fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
+ match self {
+ Self::XmlError(e) => Some(e),
+ _ => None,
+ }
+ }
+}
+
+#[cfg(test)]
+mod mock {
+ use super::*;
+ use quick_xml::Result as XmlResult;
+
+ pub struct MockXmlReader<B: BufRead> {
+ _reader: B,
+
+ pub check_end: Option<bool>,
+
+ /// Closure yielding the next event for `read_event`.
+ ///
+ /// This exists exclusively to avoid adding a lifetime parameter to
+ /// the mock when providing stub data.
+ /// A closure must be set before calling `read_event` to avoid a
+ /// panic.
+ pub next_event: Option<
+ Box<dyn for<'a> Fn(&'a mut Vec<u8>, u8) -> XmlResult<XmlEvent<'a>>>,
+ >,
+
+ pub event_i: u8,
+
+ /// Next string to yield for a text node.
+ pub next_text: Option<XmlResult<String>>,
+
+ pub given_text_ele: Option<String>,
+
+ pub read_to_end_name: Option<String>,
+ }
+
+ impl<B: BufRead> MockXmlReader<B> {
+ pub fn from_reader(reader: B) -> Self {
+ Self {
+ _reader: reader,
+ check_end: None,
+ next_event: None,
+ event_i: 0,
+ next_text: None,
+ given_text_ele: None,
+ read_to_end_name: None,
+ }
+ }
+
+ pub fn check_end_names(&mut self, val: bool) -> &mut Self {
+ self.check_end = Some(val);
+ self
+ }
+
+ pub fn read_event<'a, 'b>(
+ &'a mut self,
+ buf: &'b mut Vec<u8>,
+ ) -> XmlResult<XmlEvent<'b>> {
+ let result =
+ (self.next_event.as_ref().expect("missing mock next_event"))(
+ buf,
+ self.event_i,
+ );
+
+ self.event_i += 1;
+ result
+ }
+
+ pub fn read_text<K: AsRef<[u8]>>(
+ &mut self,
+ end: K,
+ _buf: &mut Vec<u8>,
+ ) -> XmlResult<String> {
+ self.given_text_ele =
+ Some(String::from_utf8(end.as_ref().to_vec()).unwrap());
+
+ self.next_text.take().expect("missing mock next_text")
+ }
+
+ pub fn read_to_end<K: AsRef<[u8]>>(
+ &mut self,
+ end: K,
+ _buf: &mut Vec<u8>,
+ ) -> XmlResult<()> {
+ self.read_to_end_name =
+ Some(String::from_utf8(end.as_ref().to_vec()).unwrap());
+
+ Ok(())
+ }
+ }
+}
+
+#[cfg(test)]
+mod test {
+ use super::*;
+ use crate::ir::legacyir::{SymDtype, SymType};
+ use crate::sym::DefaultInterner;
+ use crate::test::quick_xml::*;
+
+ type Sut<'i, B, I> = XmloReader<'i, B, I>;
+
+ macro_rules! xmlo_tests {
+ ($(fn $fn:ident($sut:ident, $interner:ident) $body:block)*) => {
+ $(
+ #[test]
+ fn $fn() -> XmloResult<()> {
+ let stub_data: &[u8] = &[];
+ let $interner = DefaultInterner::new();
+
+ #[allow(unused_mut)]
+ let mut $sut = Sut::new(stub_data, &$interner);
+
+ // We don't want to have to output a proper root node
+ // for every one of our tests.
+ $sut.seen_root = true;
+ $sut.pkg_name = Some($interner.intern("pkg/name"));
+
+ $body;
+
+ Ok(())
+ }
+ )*
+ };
+ }
+
+ xmlo_tests! {
+ fn sets_parsing_options(sut, interner) {
+ assert_eq!(Some(false), sut.reader.check_end);
+ }
+
+ fn proxies_xml_failures(sut, interner) {
+ sut.reader.next_event =
+ Some(Box::new(|_, _| Err(XmlError::UnexpectedEof("test".into()))));
+
+ match sut.read_event() {
+ Err(XmloError::XmlError(XmlError::UnexpectedEof(_))) => (),
+ bad => panic!("expected XmlError: {:?}", bad),
+ }
+ }
+
+ fn sym_fails_without_name(sut, interner) {
+ sut.reader.next_event = Some(Box::new(|_, _| {
+ Ok(XmlEvent::Start(MockBytesStart::new(
+ b"preproc:sym",
+ Some(MockAttributes::new(vec![])),
+ )))
+ }));
+
+ match sut.read_event() {
+ Err(XmloError::UnassociatedSym) => (),
+ bad => panic!("expected XmloError::UnassociatedSym: {:?}", bad),
+ }
+ }
+
+ fn fails_on_invalid_root(sut, interner) {
+ // xmlo_tests macro sets this for us, so we need to clear it to
+ // be able to perform the check
+ sut.seen_root = false;
+
+ sut.reader.next_event = Some(Box::new(|_, _| {
+ Ok(XmlEvent::Start(MockBytesStart::new(
+ b"not-a-valid-package-node",
+ Some(MockAttributes::new(vec![])),
+ )))
+ }));
+
+ match sut.read_event() {
+ Err(XmloError::UnexpectedRoot) => (),
+ bad => panic!("expected XmloError: {:?}", bad),
+ }
+ }
+
+ fn recognizes_valid_roots(sut, interner) {
+ // xmlo_tests macro sets this for us, so we need to clear it to
+ // be able to perform the check
+ sut.seen_root = false;
+
+ // First valid root
+ sut.reader.next_event = Some(Box::new(|_, _| {
+ Ok(XmlEvent::Start(MockBytesStart::new(
+ b"package",
+ Some(MockAttributes::new(vec![])),
+ )))
+ }));
+
+ // Will fail if the above is not valid. See below for actually
+ // testing the package node.
+ sut.read_event()?;
+
+ // We don't process namespaces (to slow) so we have to handle
+ // the difference explicitly.
+ sut.seen_root = false;
+ sut.reader.next_event = Some(Box::new(|_, _| {
+ Ok(XmlEvent::Start(MockBytesStart::new(
+ b"lv:package",
+ Some(MockAttributes::new(vec![])),
+ )))
+ }));
+
+ sut.read_event()?;
+ }
+
+ fn package_event_program(sut, interner) {
+ sut.reader.next_event = Some(Box::new(|_, _| {
+ Ok(XmlEvent::Start(MockBytesStart::new(
+ b"package",
+ Some(MockAttributes::new(vec![
+ MockAttribute::new(b"program", b"true"),
+ MockAttribute::new(
+ b"preproc:elig-class-yields", b"eligClassYields",
+ ),
+ ])),
+ )))
+ }));
+
+ let result = sut.read_event()?;
+
+ assert_eq!(
+ XmloEvent::Package(PackageAttrs {
+ program: true,
+ elig: Some(interner.intern("eligClassYields")),
+ ..Default::default()
+ }),
+ result
+ );
+ }
+
+ fn package_event_nonprogram(sut, interner) {
+ sut.reader.next_event = Some(Box::new(|_, _| {
+ Ok(XmlEvent::Start(MockBytesStart::new(
+ b"package",
+ Some(MockAttributes::new(vec![])),
+ )))
+ }));
+
+ let result = sut.read_event()?;
+
+ assert_eq!(
+ XmloEvent::Package(PackageAttrs {
+ program: false,
+ ..Default::default()
+ }),
+ result
+ );
+ }
+
+ fn package_event_name(sut, interner) {
+ sut.reader.next_event = Some(Box::new(|_, _| {
+ Ok(XmlEvent::Start(MockBytesStart::new(
+ b"package",
+ Some(MockAttributes::new(vec![
+ MockAttribute::new(b"name", b"pkg/name"),
+ MockAttribute::new(b"__rootpath", b"../../"),
+ ])),
+ )))
+ }));
+
+ let result = sut.read_event()?;
+
+ assert_eq!(
+ XmloEvent::Package(PackageAttrs {
+ name: Some(interner.intern("pkg/name")),
+ relroot: Some("../../".into()),
+ program: false,
+ ..Default::default()
+ }),
+ result
+ );
+ }
+
+ fn sym_dep_event(sut, interner) {
+ sut.reader.next_event = Some(Box::new(|_, event_i| match event_i {
+ 0 => Ok(XmlEvent::Start(MockBytesStart::new(
+ b"preproc:sym-dep",
+ Some(MockAttributes::new(vec![MockAttribute::new(
+ b"name", b"depsym",
+ )])),
+ ))),
+ 1 => Ok(XmlEvent::Empty(MockBytesStart::new(
+ b"preproc:sym-ref",
+ Some(MockAttributes::new(vec![MockAttribute::new(
+ b"name", b"dep1",
+ )])),
+ ))),
+ 2 => Ok(XmlEvent::Empty(MockBytesStart::new(
+ b"preproc:sym-ref",
+ Some(MockAttributes::new(vec![MockAttribute::new(
+ b"name", b"dep2",
+ )])),
+ ))),
+ 3 => Ok(XmlEvent::End(MockBytesEnd::new(b"preproc:sym-dep"))),
+ _ => Err(XmlError::UnexpectedEof(
+ format!("MockXmlReader out of events: {}", event_i).into(),
+ )),
+ }));
+
+ let result = sut.read_event()?;
+
+ assert_eq!(
+ XmloEvent::SymDeps(
+ interner.intern("depsym"),
+ vec![interner.intern("dep1"), interner.intern("dep2")]
+ ),
+ result
+ );
+ }
+
+ fn sym_dep_fails_with_missing_name(sut, interner) {
+ sut.reader.next_event = Some(Box::new(|_, _| {
+ Ok(XmlEvent::Start(MockBytesStart::new(
+ b"preproc:sym-dep",
+ Some(MockAttributes::new(vec![])),
+ )))
+ }));
+
+ match sut.read_event() {
+ Err(XmloError::UnassociatedSymDep) => (),
+ bad => panic!("expected XmloError: {:?}", bad),
+ }
+ }
+
+ fn sym_dep_malformed_ref_missing_name(sut, interner) {
+ sut.reader.next_event = Some(Box::new(|_, event_i| match event_i {
+ 0 => Ok(XmlEvent::Start(MockBytesStart::new(
+ b"preproc:sym-dep",
+ Some(MockAttributes::new(vec![MockAttribute::new(
+ b"name", b"depsymbad",
+ )])),
+ ))),
+ // no attributes
+ 1 => Ok(XmlEvent::Empty(MockBytesStart::new(
+ b"preproc:sym-ref",
+ Some(MockAttributes::new(vec![])),
+ ))),
+ _ => Err(XmlError::UnexpectedEof(
+ format!("MockXmlReader out of events: {}", event_i).into(),
+ )),
+ }));
+
+ match sut.read_event() {
+ Err(XmloError::MalformedSymRef(msg)) => {
+ assert!(msg.contains("preproc:sym-ref/@name"))
+ },
+ bad => panic!("expected XmloError: {:?}", bad),
+ }
+ }
+
+ fn sym_dep_malformed_ref_unexpected_element(sut, interner) {
+ sut.reader.next_event = Some(Box::new(|_, event_i| match event_i {
+ 0 => Ok(XmlEvent::Start(MockBytesStart::new(
+ b"preproc:sym-dep",
+ Some(MockAttributes::new(vec![MockAttribute::new(
+ b"name", b"depsym-unexpected",
+ )])),
+ ))),
+ // text is okay (e.g. whitespace)
+ 1 => Ok(XmlEvent::Text(MockBytesText::new(
+ b" ",
+ ))),
+ // unexpected (not a preproc:sym-ref)
+ 2 => Ok(XmlEvent::Empty(MockBytesStart::new(
+ b"preproc:unexpected",
+ Some(MockAttributes::new(vec![])),
+ ))),
+ _ => Err(XmlError::UnexpectedEof(
+ format!("MockXmlReader out of events: {}", event_i).into(),
+ )),
+ }));
+
+ match sut.read_event() {
+ Err(XmloError::MalformedSymRef(msg)) => {
+ assert!(msg.contains("depsym-unexpected"))
+ },
+ bad => panic!("expected XmloError: {:?}", bad),
+ }
+
+ // We should have gotten past the text
+ assert_eq!(3, sut.reader.event_i, "Did not ignore Text");
+ }
+
+ fn eoh_after_fragments(sut, interner) {
+ sut.reader.next_event = Some(Box::new(|_, _| {
+ Ok(XmlEvent::End(MockBytesEnd::new(b"preproc:fragments")))
+ }));
+
+ let result = sut.read_event()?;
+
+ assert_eq!(XmloEvent::Eoh, result);
+ }
+
+ fn fragment_event(sut, interner) {
+ let expected = "fragment text".to_string();
+ sut.reader.next_text = Some(Ok(expected.clone()));
+
+ sut.reader.next_event = Some(Box::new(|_, _| {
+ Ok(XmlEvent::Start(MockBytesStart::new(
+ b"preproc:fragment",
+ Some(MockAttributes::new(vec![MockAttribute::new(
+ b"id", b"fragsym",
+ )])),
+ )))
+ }));
+
+ let result = sut.read_event()?;
+
+ assert_eq!(
+ XmloEvent::Fragment(interner.intern("fragsym"), expected),
+ result
+ );
+
+ // argument provided to underlying read_text
+ assert_eq!(
+ Some("preproc:fragment".to_string()),
+ sut.reader.given_text_ele
+ );
+ }
+
+ fn fragment_fails_with_missing_id(sut, interner) {
+ sut.reader.next_event = Some(Box::new(|_, _| {
+ Ok(XmlEvent::Start(MockBytesStart::new(
+ b"preproc:fragment",
+ Some(MockAttributes::new(vec![])),
+ )))
+ }));
+
+ match sut.read_event() {
+ Err(XmloError::UnassociatedFragment) => (),
+ bad => panic!("expected XmloError: {:?}", bad),
+ }
+ }
+
+ // Yes, this happened.
+ fn fragment_fails_with_empty_id(sut, interner) {
+ sut.reader.next_event = Some(Box::new(|_, _| {
+ Ok(XmlEvent::Start(MockBytesStart::new(
+ b"preproc:fragment",
+ Some(MockAttributes::new(vec![MockAttribute::new(
+ b"id", b"",
+ )])),
+ )))
+ }));
+
+ match sut.read_event() {
+ Err(XmloError::UnassociatedFragment) => (),
+ bad => panic!("expected XmloError: {:?}", bad),
+ }
+ }
+
+ fn fragment_fails_with_missing_text(sut, interner) {
+ sut.reader.next_text = Some(Err(XmlError::TextNotFound));
+
+ sut.reader.next_event = Some(Box::new(|_, _| {
+ Ok(XmlEvent::Start(MockBytesStart::new(
+ b"preproc:fragment",
+ Some(MockAttributes::new(vec![MockAttribute::new(
+ b"id", b"fragsym",
+ )])),
+ )))
+ }));
+
+ match sut.read_event() {
+ Err(XmloError::MissingFragmentText(symname)) => {
+ assert_eq!("fragsym".to_string(), symname)
+ }
+ bad => panic!("expected XmloError: {:?}", bad),
+ }
+ }
+
+ fn skips_unneeded_nodes(sut, interner) {
+ sut.reader.next_event = Some(Box::new(|_, event_i| match event_i {
+ // Skip over this
+ 0 => Ok(XmlEvent::End(MockBytesEnd::new(
+ b"preproc:ignore-me",
+ ))),
+
+ // And this
+ 1 => Ok(XmlEvent::Start(MockBytesStart::new(
+ b"preproc:symtable",
+ Some(MockAttributes::new(vec![])),
+ ))),
+
+ // But process this
+ 2 => Ok(XmlEvent::Empty(MockBytesStart::new(
+ b"preproc:sym",
+ Some(MockAttributes::new(vec![MockAttribute::new(
+ b"name", b"sym-expected",
+ )])),
+ ))),
+
+ _ => Err(XmlError::UnexpectedEof(
+ format!("MockXmlReader out of events: {}", event_i).into(),
+ )),
+ }));
+
+ let result = sut.read_event()?;
+
+ assert_eq!(
+ XmloEvent::SymDecl(
+ interner.intern("sym-expected"),
+ SymAttrs {
+ pkg_name: Some(interner.intern("pkg/name")),
+ ..Default::default()
+ },
+ ),
+ result
+ );
+ }
+
+ // Some preproc:sym nodes have children (`func` symbols,
+ // specifically) that we choose to ignore. See next test for
+ // data we do care about.
+ fn sym_nonempty_element(sut, interner) {
+ sut.reader.next_event = Some(Box::new(|_, _| {
+ // Notice Start, not Empty
+ Ok(XmlEvent::Start(MockBytesStart::new(
+ b"preproc:sym",
+ Some(MockAttributes::new(vec![
+ MockAttribute::new(
+ b"name", b"sym-nonempty",
+ ),
+ // Just to observe that processing works properly
+ MockAttribute::new(
+ b"dim", b"2",
+ ),
+ ])),
+ )))
+ }));
+
+ let result = sut.read_event()?;
+
+ assert_eq!(
+ XmloEvent::SymDecl(
+ interner.intern("sym-nonempty"),
+ SymAttrs {
+ dim: Some(2),
+ pkg_name: Some(interner.intern("pkg/name")),
+ ..Default::default()
+ },
+ ),
+ result
+ );
+
+ // Ensure that we have skipped the remainder of this element
+ // (all of its children) so that the next event will yield the
+ // next symbol.
+ assert_eq!(Some("preproc:sym".into()), sut.reader.read_to_end_name);
+ }
+
+ // `map` symbols include information about their source
+ // fields.
+ fn sym_map_from(sut, interner) {
+ sut.reader.next_event = Some(Box::new(|_, event_i| match event_i {
+ // Notice Start, not Empty
+ 0 => Ok(XmlEvent::Start(MockBytesStart::new(
+ b"preproc:sym",
+ Some(MockAttributes::new(vec![
+ MockAttribute::new(
+ b"name", b"sym-map-from",
+ ),
+ MockAttribute::new(
+ b"type", b"map",
+ ),
+ ])),
+ ))),
+
+ 1 => Ok(XmlEvent::Empty(MockBytesStart::new(
+ b"preproc:from",
+ Some(MockAttributes::new(vec![
+ MockAttribute::new(
+ b"name", b"from-a",
+ ),
+ ])),
+ ))),
+
+ // make sure that whitespace is permitted
+ 2 => Ok(XmlEvent::Text(MockBytesText::new(
+ b" ",
+ ))),
+
+ 3 => Ok(XmlEvent::Empty(MockBytesStart::new(
+ b"preproc:from",
+ Some(MockAttributes::new(vec![
+ MockAttribute::new(
+ b"name", b"from-b",
+ ),
+ ])),
+ ))),
+
+ 4 => Ok(XmlEvent::End(MockBytesEnd::new(
+ b"preproc:sym",
+ ))),
+
+ _ => Err(XmlError::UnexpectedEof(
+ format!("MockXmlReader out of events: {}", event_i).into(),
+ )),
+ }));
+
+ let result = sut.read_event()?;
+
+ assert_eq!(
+ XmloEvent::SymDecl(
+ interner.intern("sym-map-from"),
+ SymAttrs {
+ ty: Some(SymType::Map),
+ from: Some(vec![
+ interner.intern("from-a"),
+ interner.intern("from-b"),
+ ]),
+ pkg_name: Some(interner.intern("pkg/name")),
+ ..Default::default()
+ },
+ ),
+ result
+ );
+
+ // Should _not_ have read to the end.
+ assert_eq!(None, sut.reader.read_to_end_name);
+ }
+
+ fn sym_map_from_missing_name(sut, interner) {
+ sut.reader.next_event = Some(Box::new(|_, event_i| match event_i {
+ // Notice Start, not Empty
+ 0 => Ok(XmlEvent::Start(MockBytesStart::new(
+ b"preproc:sym",
+ Some(MockAttributes::new(vec![
+ MockAttribute::new(
+ b"name", b"sym-map-from-bad",
+ ),
+ MockAttribute::new(
+ b"type", b"map",
+ ),
+ ])),
+ ))),
+
+ // missing @name
+ 1 => Ok(XmlEvent::Empty(MockBytesStart::new(
+ b"preproc:from",
+ Some(MockAttributes::new(vec![])),
+ ))),
+
+ 2 => Ok(XmlEvent::End(MockBytesEnd::new(
+ b"preproc:sym",
+ ))),
+
+ _ => Err(XmlError::UnexpectedEof(
+ format!("MockXmlReader out of events: {}", event_i).into(),
+ )),
+ }));
+
+ match sut.read_event() {
+ Err(XmloError::InvalidMapFrom(msg)) => {
+ assert!(msg.contains("preproc:from"))
+ }
+ bad => panic!("expected XmloError: {:?}", bad),
+ }
+ }
+
+ fn sym_map_from_unexpected_data(sut, interner) {
+ sut.reader.next_event = Some(Box::new(|_, event_i| match event_i {
+ // Notice Start, not Empty
+ 0 => Ok(XmlEvent::Start(MockBytesStart::new(
+ b"preproc:sym",
+ Some(MockAttributes::new(vec![
+ MockAttribute::new(
+ b"name", b"sym-map-from-bad",
+ ),
+ MockAttribute::new(
+ b"type", b"map",
+ ),
+ ])),
+ ))),
+
+ // garbage
+ 1 => Ok(XmlEvent::Empty(MockBytesStart::new(
+ b"preproc:nonsense",
+ Some(MockAttributes::new(vec![])),
+ ))),
+
+ _ => Err(XmlError::UnexpectedEof(
+ format!("MockXmlReader out of events: {}", event_i).into(),
+ )),
+ }));
+
+ match sut.read_event() {
+ Err(XmloError::InvalidMapFrom(_)) => (),
+ bad => panic!("expected XmloError: {:?}", bad),
+ }
+ }
+ }
+
+ macro_rules! sym_test_reader_event {
+ ($sut:ident, $interner:ident, $name:ident, $($key:ident=$val:literal),*) => {
+ // See xmlo_tests macro for explanation
+ $sut.seen_root = true;
+
+ $sut.reader.next_event = Some(Box::new(|_, event_i| {
+ match event_i {
+ 0 => Ok(XmlEvent::Start(MockBytesStart::new(
+ b"package",
+ Some(MockAttributes::new(vec![
+ MockAttribute::new(b"name", b"pkg/name")
+ ])),
+ ))),
+
+ 1 => Ok(XmlEvent::Empty(MockBytesStart::new(
+ b"preproc:sym",
+ Some(MockAttributes::new(
+ vec![
+ MockAttribute::new(
+ b"name",
+ stringify!($name).as_bytes(),
+ ),
+ $(
+ MockAttribute::new(
+ stringify!($key).as_bytes(),
+ $val.as_bytes(),
+ ),
+ )*
+ ],
+ )),
+ ))),
+
+ _ => Err(XmlError::UnexpectedEof(
+ format!("MockXmlReader out of events: {}", event_i).into(),
+ )),
+ }
+ }));
+
+ // consume the package to set the name
+ let _ = $sut.read_event();
+ }
+ }
+
+ macro_rules! sym_tests {
+ (($interner:ident) $($name:ident: [$($key:ident=$val:literal),*] => $expect:expr)*) => {
+ $(
+ #[test]
+ fn $name() -> XmloResult<()> {
+ let stub_data: &[u8] = &[];
+ let $interner = DefaultInterner::new();
+ let mut sut = Sut::new(stub_data, &$interner);
+
+ sym_test_reader_event!(sut, $interner, $name, $( $key=$val ),*);
+
+ let result = sut.read_event()?;
+
+ let mut expected_attrs = $expect;
+ expected_attrs.pkg_name = Some($interner.intern("pkg/name"));
+
+ assert_eq!(
+ XmloEvent::SymDecl(
+ $interner.intern(stringify!($name)),
+ expected_attrs
+ ),
+ result
+ );
+ Ok(())
+ }
+ )*
+ }
+ }
+
+ sym_tests! {
+ (interner)
+
+ src: [src="foo/bar/baz"] => SymAttrs {
+ // see macro for src relpath
+ src: Some(interner.intern("foo/bar/baz")),
+ ..Default::default()
+ }
+
+ // note that this doesn't test every type; we're not going to
+ // duplicate the mapping for all of them here
+ tycgen: [type="cgen"] => SymAttrs {
+ ty: Some(SymType::Cgen),
+ ..Default::default()
+ }
+
+ dim_0: [dim="0"] => SymAttrs {
+ dim: Some(0),
+ ..Default::default()
+ }
+
+ dim_1: [dim="1"] => SymAttrs {
+ dim: Some(1),
+ ..Default::default()
+ }
+
+ dtyboolean: [dtype="boolean"] => SymAttrs {
+ dtype: Some(SymDtype::Boolean),
+ ..Default::default()
+ }
+
+ dtyinteger: [dtype="integer"] => SymAttrs {
+ dtype: Some(SymDtype::Integer),
+ ..Default::default()
+ }
+
+ dtyfloat: [dtype="float"] => SymAttrs {
+ dtype: Some(SymDtype::Float),
+ ..Default::default()
+ }
+
+ dtyempty: [dtype="empty"] => SymAttrs {
+ dtype: Some(SymDtype::Empty),
+ ..Default::default()
+ }
+
+ extern_true: [extern="true"] => SymAttrs {
+ extern_: true,
+ ..Default::default()
+ }
+
+ // The compiler will never produce nonsense values, so we'll just
+ // provide a sane default rather than adding extra checks (and
+ // hopefully we don't regret this)
+ extern_crap: [extern="nonsense"] => SymAttrs {
+ extern_: false,
+ ..Default::default()
+ }
+
+ parent: [parent="foo"] => SymAttrs {
+ parent: Some(interner.intern("foo")),
+ ..Default::default()
+ }
+
+ yields: [yields="yield"] => SymAttrs {
+ yields: Some(interner.intern("yield")),
+ ..Default::default()
+ }
+
+ desc: [desc="Description"] => SymAttrs {
+ desc: Some("Description".to_string()),
+ ..Default::default()
+ }
+
+ r#virtual: [virtual="true"] => SymAttrs {
+ virtual_: true,
+ ..Default::default()
+ }
+
+ r#override: [isoverride="true"] => SymAttrs {
+ override_: true,
+ ..Default::default()
+ }
+
+ // Multiple attributes at once
+ multi: [src="foo", type="class", dim="1", dtype="float", extern="true"]
+ => SymAttrs {
+ // see macro for src relpath
+ src: Some(interner.intern("foo")),
+ ty: Some(SymType::Class),
+ dim: Some(1),
+ dtype: Some(SymDtype::Float),
+ extern_: true,
+ ..Default::default()
+ }
+ }
+
+ // can't be tested using the above
+ #[test]
+ fn generated_true() -> XmloResult<()> {
+ let stub_data: &[u8] = &[];
+ let interner = DefaultInterner::new();
+ let mut sut = Sut::new(stub_data, &interner);
+
+ // See xmlo_tests macro for explanation
+ sut.seen_root = true;
+ sut.pkg_name = Some(interner.intern("pkg/name"));
+
+ sut.reader.next_event = Some(Box::new(|_, _| {
+ Ok(XmlEvent::Empty(MockBytesStart::new(
+ b"preproc:sym",
+ Some(MockAttributes::new(vec![
+ MockAttribute::new(b"name", b"generated_true"),
+ MockAttribute::new(b"preproc:generated", b"true"),
+ ])),
+ )))
+ }));
+
+ let result = sut.read_event()?;
+
+ let expected_attrs = SymAttrs {
+ generated: true,
+ pkg_name: Some(interner.intern("pkg/name")),
+ ..Default::default()
+ };
+
+ assert_eq!(
+ XmloEvent::SymDecl(
+ interner.intern("generated_true"),
+ expected_attrs
+ ),
+ result
+ );
+
+ Ok(())
+ }
+
+ #[test]
+ fn fails_on_non_ascii_dim() {
+ let stub_data: &[u8] = &[];
+ let interner = DefaultInterner::new();
+ let mut sut = Sut::new(stub_data, &interner);
+
+ sym_test_reader_event!(sut, interner, fail_sym, dim = "X1");
+
+ match sut.read_event() {
+ Err(XmloError::InvalidDim(msg)) => assert!(msg.contains("X1")),
+ bad => panic!("expected failure: {:?}", bad),
+ }
+ }
+
+ #[test]
+ fn fails_on_multi_char_dim() {
+ let stub_data: &[u8] = &[];
+ let interner = DefaultInterner::new();
+ let mut sut = Sut::new(stub_data, &interner);
+
+ sym_test_reader_event!(sut, interner, fail_sym, dim = "11");
+
+ match sut.read_event() {
+ Err(XmloError::InvalidDim(msg)) => assert!(msg.contains("11")),
+ bad => panic!("expected failure: {:?}", bad),
+ }
+ }
+
+ #[test]
+ fn fails_on_invalid_type() {
+ let stub_data: &[u8] = &[];
+ let interner = DefaultInterner::new();
+ let mut sut = Sut::new(stub_data, &interner);
+
+ sym_test_reader_event!(sut, interner, fail_sym, type = "foo");
+
+ match sut.read_event() {
+ Err(XmloError::InvalidType(msg)) => assert!(msg.contains("foo")),
+ bad => panic!("expected failure: {:?}", bad),
+ }
+ }
+
+ #[test]
+ fn fails_on_invalid_dtype() {
+ let stub_data: &[u8] = &[];
+ let interner = DefaultInterner::new();
+ let mut sut = Sut::new(stub_data, &interner);
+
+ sym_test_reader_event!(sut, interner, fail_sym, dtype = "foo");
+
+ match sut.read_event() {
+ Err(XmloError::InvalidDtype(msg)) => assert!(msg.contains("foo")),
+ bad => panic!("expected failure: {:?}", bad),
+ }
+ }
+
+ #[test]
+ fn fails_when_missing_sym_name() {
+ let stub_data: &[u8] = &[];
+ let interner = DefaultInterner::new();
+ let mut sut = Sut::new(stub_data, &interner);
+
+ sym_test_reader_event!(sut, interner, fail_sym, dtype = "foo");
+
+ match sut.read_event() {
+ Err(XmloError::InvalidDtype(msg)) => assert!(msg.contains("foo")),
+ bad => panic!("expected failure: {:?}", bad),
+ }
+ }
+}
diff --git a/tamer/src/sym.rs b/tamer/src/sym.rs
new file mode 100644
index 0000000..14618e6
--- /dev/null
+++ b/tamer/src/sym.rs
@@ -0,0 +1,826 @@
+// String internment
+//
+// Copyright (C) 2014-2019 Ryan Specialty Group, LLC.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+//! String internment system.
+//!
+//! Interned strings are represented by [`Symbol`],
+//! created by an [`Interner`]:
+//~
+//! - [`ArenaInterner`] - Intern pool backed by an [arena][] for fast
+//! and stable allocation.
+//! - [`DefaultInterner`] - The currently recommended intern pool
+//! configuration for symbol interning.
+//! - [`FxArenaInterner`] - Intern pool backed by an [arena][] using the
+//! [Fx Hash][fxhash] hashing algorithm.
+//!
+//! Interners return symbols by reference which allows for `O(1)` comparison
+//! by pointer.
+//!
+//! [arena]: bumpalo
+//!
+//! ```
+//! use tamer::sym::{Interner, DefaultInterner, Symbol, SymbolIndex};
+//!
+//! // Inputs to be interned
+//! let a = "foo";
+//! let b = &"foo".to_string();
+//! let c = "foobar";
+//! let d = &c[0..3];
+//!
+//! // Interners employ interior mutability and so do not need to be
+//! // declared `mut`
+//! let interner = DefaultInterner::new();
+//!
+//! let (ia, ib, ic, id) = (
+//! interner.intern(a),
+//! interner.intern(b),
+//! interner.intern(c),
+//! interner.intern(d),
+//! );
+//!
+//! assert_eq!(ia, ib);
+//! assert_eq!(ia, id);
+//! assert_eq!(ib, id);
+//! assert_ne!(ia, ic);
+//!
+//! // All interns can be cloned and clones are eq
+//! assert_eq!(*ia, ia.clone());
+//!
+//! // Only "foo" and "foobar" are interned
+//! assert_eq!(2, interner.len());
+//! assert!(interner.contains("foo"));
+//! assert!(interner.contains("foobar"));
+//! assert!(!interner.contains("something else"));
+//!
+//! // Each symbol has an associated, densely-packed integer value
+//! // that can be used for indexing
+//! assert_eq!(SymbolIndex::from_u32(1), ia.index());
+//! assert_eq!(SymbolIndex::from_u32(1), ib.index());
+//! assert_eq!(SymbolIndex::from_u32(2), ic.index());
+//! assert_eq!(SymbolIndex::from_u32(1), id.index());
+//! ```
+//!
+//! What Is String Interning?
+//! =========================
+//! _[String interning][]_ is a process by which a single copy of a string
+//! is stored immutably in memory as part of a _pool_.
+//! When the same string is later encountered,
+//! a reference to the string in the pool is used rather than allocating a
+//! new string.
+//! Interned strings are typically referred to as "symbols" or "atoms".
+//!
+//! String comparison then amounts to comparing pointers (`O(1)`)
+//! rather than having to scan the string (`O(n)`).
+//! There is, however, a hashing cost of interning strings,
+//! as well as looking up strings in the intern pool.
+//!
+//! [string interning]: https://en.wikipedia.org/wiki/String_interning
+//!
+//!
+//! Internment Mechanism
+//! ====================
+//! The current [`DefaultInterner`] is [`FxArenaInterner`],
+//! which is an [arena][]-allocated intern pool mapped by the
+//! [Fx Hash][fxhash] hash function:
+//!
+//! 1. Strings are compared against the existing intern pool using a
+//! [`HashMap`].
+//! 2. If a string has not yet been interned:
+//! - The string is copied into the arena-backed pool;
+//! - A new [`Symbol`] is allocated adjacent to it in the arena holding
+//! a string slice referencing the arena-allocated string; and
+//! - The symbol is stored as the value in the [`HashMap`] for that key.
+//! 3. Otherwise, a reference to the existing [`Symbol`] is returned.
+//!
+//! Since the arena provides a stable location in memory,
+//! and all symbols are immutable,
+//! [`ArenaInterner`] is able to safely return any number of references to
+//! a single [`Symbol`],
+//! bound to the lifetime of the arena itself.
+//! Since the [`Symbol`] contains the string slice,
+//! it also acts as a [smart pointer] for the interned string itself,
+//! allowing [`Symbol`] to be used in any context where `&str` is
+//! expected.
+//! Dropping a [`Symbol`] does _not_ affect the underlying arena-allocated
+//! data.
+//!
+//! [smart pointer]: https://doc.rust-lang.org/book/ch15-00-smart-pointers.html
+//!
+//! Each symbol also has an associated integer index value
+//! (see [`Symbol::index`]),
+//! which provides a dense range of values suitable for use in vectors
+//! as an alternative to [`HashMap`] for mapping symbol data.
+//!
+//! Since a reference to the same [`Symbol`] is returned for each
+//! [`Interner::intern`] and [`Interner::intern_soft`] call,
+//! symbols can be compared by pointer in `O(1)` time.
+//! Symbols also implement [`Copy`],
+//! and will still compare equal to other symbols referencing the same
+//! interned value by comparing the underlying string slice pointers.
+//!
+//! This implementation was heavily motivated by [Rustc's own internment
+//! system][rustc-intern],
+//! but differs in significant ways:
+//!
+//! - This implementation stores string references in [`Symbol`] rather
+//! than relying on a global singleton [`Interner`];
+//! - Consequently, associates the lifetime of interned strings with that
+//! of the underlying arena rather than casting to `&'static`;
+//! - Retrieves symbol values by pointer reference without requiring use
+//! of [`Interner`] or a locking mechanism; and
+//! - Stores [`Symbol`] objects in the arena rather than within a vector
+//! indexed by [`SymbolIndex`].
+//!
+//!
+//! Name Mangling
+//! =============
+//! Interners do not perform [name mangling][].
+//! For future consideration,
+//! see [RFC 2603][rfc-2603] and the [Itanium ABI][itanium-abi].
+//!
+//! [name mangling]: https://en.wikipedia.org/wiki/Name_mangling
+//! [rfc-2603]: https://rust-lang.github.io/rfcs/2603-symbol-name-mangling-v2.html
+//! [itanium-abi]: http://refspecs.linuxbase.org/cxxabi-1.86.html#mangling
+//!
+//!
+//! Related Work and Further Reading
+//! ================================
+//! String interning is often tightly coupled with symbols (in the generic
+//! sense),
+//! sometimes called atoms.
+//! Symbols can often be either interned,
+//! and therefore compared for equivalency,
+//! or _uninterned_,
+//! which makes them unique even to symbols of the same name.
+//! Interning may also be done automatically by a language for performance.
+//! Languages listed below that allow for explicit interning may also
+//! perform automatic interning as well
+//! (for example, `'symbol` in Lisp and `lowercase_vars` as atoms in
+//! Erlang).
+//!
+//! | Language | Interned | Uninterned |
+//! | -------- | -------- | ---------- |
+//! | [Erlang][] | [`list_to_atom`][edt] | _(None)_ |
+//! | [GNU Emacs Lisp][] | [`intern`][es], [`intern-soft`][es] | [`make-symbol`][es], [`gensym`][es] |
+//! | [GNU Guile][] | [`string->symbol`][gs], [`gensym`][gs] | [`make-symbol`][gu] |
+//! | [JavaScript][] | [`Symbol.for`][js] | [`Symbol`][js] |
+//! | [Java][] | [`intern`][jvs] | _(None)_ |
+//! | [Lua][] | _(Automatic for string performance)_ | _(None)_ |
+//! | [MIT/GNU Scheme][] | [`intern`][ms], [`intern-soft`][ms], [`string->symbol`][ms] | [`string->uninterned-symbol`][ms], [`generate-uninterned-symbol`][ms] |
+//! | [PHP][] | _(Automatic for string [performance][pp])_ | _(None)_ |
+//! | [Python][] | [`sys.intern`][pys] | _(None)_ |
+//! | [R6RS Scheme][] | [`string->symbol`][r6s] | _(None)_ |
+//! | [Racket][] | [`string->symbol`][rs], [`string->unreadable-symbol`][rs] | [`string->uninterned-symbol`][rs], [`gensym`][rs] |
+//!
+//! [gnu guile]: https://www.gnu.org/software/guile/
+//! [gs]: https://www.gnu.org/software/guile/manual/html_node/Symbol-Primitives.html#Symbol-Primitives
+//! [gu]: https://www.gnu.org/software/guile/manual/html_node/Symbol-Uninterned.html#Symbol-Uninterned
+//! [gnu emacs lisp]: https://www.gnu.org/software/emacs/
+//! [es]: https://www.gnu.org/software/emacs/manual/html_node/elisp/Creating-Symbols.html
+//! [racket]: https://racket-lang.org/
+//! [rs]: https://docs.racket-lang.org/reference/symbols.html
+//! [r6rs scheme]: http://www.r6rs.org/
+//! [r6s]: http://www.r6rs.org/final/html/r6rs/r6rs-Z-H-14.html
+//! [mit/gnu scheme]: https://www.gnu.org/software/mit-scheme/
+//! [ms]: https://www.gnu.org/software/mit-scheme/documentation/mit-scheme-ref/Symbols.html
+//! [javascript]: https://developer.mozilla.org/en-US/docs/Web/JavaScript
+//! [js]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Symbol
+//! [java]: http://openjdk.java.net/
+//! [jvs]: https://cr.openjdk.java.net/~iris/se/12/latestSpec/api/java.base/java/lang/String.html#intern()
+//! [php]: https://www.php.net/
+//! [pp]: https://wiki.php.net/rfc/performanceimprovements
+//! [erlang]: https://erlang.org/
+//! [edt]: http://erlang.org/doc/reference_manual/data_types.html
+//! [lua]: https://www.lua.org/
+//! [python]: https://www.python.org/
+//! [pys]: https://docs.python.org/3/library/sys.html
+//!
+//! More information:
+//! - Wikipedia entry on [string interning][].
+//! - The [flyweight pattern][] in object-oriented programming is a type
+//! of interning.
+//! - [RFC 1845][rfc-1845] gives an example of string interning using
+//! `Rc<str>`.
+//! - Emacs directly exposes the intern pool at runtime as
+//! [`obarray`][es].
+//! - [`string-cache`][rust-string-cache] is a string interning system
+//! for Rust developed by Mozilla for Servo.
+//! - [`string-interner`][rust-string-interner] is another string
+//! interning library for Rust.
+//! - [Rustc interns strings as `Symbol`s][rustc-intern] using an
+//! [arena allocator][rustc-arena] and avoids `Rc` by representing
+//! symbols as integer values and converting them to strings using a
+//! global pool and unsafe rust to cast to a `static` slice.
+//! - Rustc identifies symbols by integer value encapsulated within a
+//! `Symbol`.
+//! - Rustc's [`newtype_index!` macro][rustc-nt] uses
+//! [`global::NonZeroProgSymSize`] so that [`Option`] uses no
+//! additional space (see [pull request `53315`][rustc-nt-pr]).
+//! - Differences between TAMER and Rustc's implementations are outlined
+//! above.
+//!
+//! [flyweight pattern]: https://en.wikipedia.org/wiki/Flyweight_pattern
+//! [rust-string-cache]: https://github.com/servo/string-cache
+//! [rust-string-interner]: https://github.com/robbepop/string-interner
+//! [rfc-1845]: https://rust-lang.github.io/rfcs/1845-shared-from-slice.html
+//! [rustc-intern]: https://doc.rust-lang.org/nightly/nightly-rustc/syntax/ast/struct.Name.html
+//! [rustc-arena]: https://doc.rust-lang.org/nightly/nightly-rustc/arena/index.html
+//! [rustc-nt]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_index/macro.newtype_index.html
+//! [rustc-nt-pr]: https://github.com/rust-lang/rust/pull/53315
+//!
+//! The hash function chosen for this module is [Fx Hash][fxhash].
+//!
+//! - Rustc previously used the [Fowler-Noll-Vo (FNV)][fnv] hash
+//! function,
+//! but [now uses Fx Hash][rustc-fx].
+//! This was extracted into the [`fxhash`][fxhash] crate,
+//! which is used by TAMER.
+//! - TAMER originally used FNV,
+//! but benchmarks showed that Fx Hash was more performant.
+//! - Benchmarks for other hash functions,
+//! including FNV,
+//! can be found at the [`hash-rs`][hash-rs] project.
+//!
+//! [fnv]: https://doc.servo.org/fnv/
+//! [rustc-fx]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_data_structures/fx/index.html
+//! [hash-rs]: https://github.com/Gankra/hash-rs
+
+use crate::global;
+use bumpalo::Bump;
+use fxhash::FxBuildHasher;
+use std::cell::{Cell, RefCell};
+use std::collections::HashMap;
+use std::convert::TryInto;
+use std::fmt;
+use std::hash::BuildHasher;
+use std::ops::Deref;
+
+/// Unique symbol identifier.
+///
+/// _Do not construct this value yourself;_
+/// use an [`Interner`].
+///
+/// This newtype helps to prevent other indexes from being used where a
+/// symbol index is expected.
+/// Note, however, that it provides no defense against mixing symbol indexes
+/// between multiple [`Interner`]s.
+///
+/// The index `0` is never valid because of [`global::NonZeroProgSymSize`],
+/// which allows us to have `Option<SymbolIndex>` at no space cost.
+#[derive(Copy, Clone, Debug, PartialEq, Eq)]
+pub struct SymbolIndex(global::NonZeroProgSymSize);
+
+impl From<SymbolIndex> for usize {
+ fn from(value: SymbolIndex) -> usize {
+ value.0.get().try_into().unwrap()
+ }
+}
+
+impl SymbolIndex {
+ /// Construct index from a non-zero `u32` value.
+ ///
+ /// Panics
+ /// ------
+ /// Will panic if `n == 0`.
+ pub fn from_u32(n: u32) -> SymbolIndex {
+ SymbolIndex(global::NonZeroProgSymSize::new(n).unwrap())
+ }
+
+ /// Construct index from an unchecked non-zero `u32` value.
+ ///
+ /// This does not verify that `n > 0` and so must only be used in
+ /// contexts where this invariant is guaranteed to hold.
+ /// Unlike [`from_u32`](SymbolIndex::from_u32),
+ /// this never panics.
+ unsafe fn from_u32_unchecked(n: u32) -> SymbolIndex {
+ SymbolIndex(global::NonZeroProgSymSize::new_unchecked(n))
+ }
+}
+
+/// Interned string.
+///
+/// A reference to this symbol is returned each time the same string is
+/// interned with the same [`Interner`];
+/// as such,
+/// symbols can be compared for equality by pointer;
+/// the underlying symbol id need not be used.
+///
+/// Each symbol is identified by a unique integer
+/// (see [`index`](Symbol::index)).
+/// The use of integers creates a more dense range of values than pointers,
+/// which allows callers to use a plain [`Vec`] as a map instead of
+/// something far more expensive like
+/// [`HashSet`](std::collections::HashSet);
+/// this is especially beneficial for portions of the system that make
+/// use of nearly all interned symbols,
+/// like the ASG.
+///
+/// The symbol also stores a string slice referencing the interned string
+/// itself,
+/// whose lifetime is that of the [`Interner`]'s underlying data store.
+/// Dereferencing the symbol will expose the underlying slice.
+#[derive(Copy, Clone, Debug)]
+pub struct Symbol<'i> {
+ index: SymbolIndex,
+ str: &'i str,
+}
+
+impl<'i> Symbol<'i> {
+ /// Construct a new interned value.
+ ///
+ /// _This must only be done by an [`Interner`]._
+ /// As such,
+ /// this function is not public.
+ ///
+ /// For test builds (when `cfg(test)`),
+ /// `new_dummy` is available to create symbols for tests.
+ #[inline]
+ fn new(index: SymbolIndex, str: &'i str) -> Symbol<'i> {
+ Self { index, str }
+ }
+
+ /// Retrieve unique symbol index.
+ ///
+ /// This is a densely-packed identifier that can be used as an index for
+ /// mapping.
+ /// See [`SymbolIndex`] for more information.
+ #[inline]
+ pub fn index(&self) -> SymbolIndex {
+ self.index
+ }
+
+ /// Construct a new interned value _for testing_.
+ ///
+ /// This is a public version of [`Symbol::new`] available for test
+ /// builds.
+ /// This separate name is meant to strongly imply that you should not be
+ /// doing this otherwise.
+ #[cfg(test)]
+ #[inline(always)]
+ pub fn new_dummy(index: SymbolIndex, str: &'i str) -> Symbol<'i> {
+ Self::new(index, str)
+ }
+}
+
+impl<'i> PartialEq for Symbol<'i> {
+ fn eq(&self, other: &Self) -> bool {
+ std::ptr::eq(self as *const _, other as *const _)
+ || std::ptr::eq(self.str.as_ptr(), other.str.as_ptr())
+ }
+}
+
+impl<'i> Eq for Symbol<'i> {}
+
+impl<'i> Deref for Symbol<'i> {
+ type Target = str;
+
+ /// Dereference to interned string slice.
+ ///
+ /// This allows for symbols to be used where strings are expected.
+ #[inline]
+ fn deref(&self) -> &str {
+ self.str
+ }
+}
+
+impl<'i> fmt::Display for Symbol<'i> {
+ /// Display name of underlying string.
+ ///
+ /// Since symbols contain pointers to their interned slices,
+ /// we effectively get this for free.
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ write!(f, "{}", self.str)
+ }
+}
+
+/// Create, store, compare, and retrieve [`Symbol`] values.
+///
+/// Interners accept string slices and produce values of type [`Symbol`].
+/// A reference to the same [`Symbol`] will always be returned for a given
+/// string,
+/// allowing symbols to be compared for equality cheaply by comparing
+/// pointers.
+/// Symbol locations in memory are fixed for the lifetime of the interner.
+///
+/// If you care whether a value has been interned yet or not,
+/// see [`intern_soft`][Interner::intern_soft`] and
+/// [`contains`](Interner::contains).
+///
+/// See the [module-level documentation](self) for an example.
+pub trait Interner<'i> {
+ /// Intern a string slice or return an existing [`Symbol`].
+ ///
+ /// If the provided string has already been interned,
+ /// then a reference to the existing [`Symbol`] will be returned.
+ /// Otherwise,
+ /// the string will be interned and a new [`Symbol`] created.
+ ///
+ /// The lifetime of the returned symbol is bound to the lifetime of the
+ /// underlying intern pool.
+ ///
+ /// To retrieve an existing symbol _without_ interning,
+ /// see [`intern_soft`](Interner::intern_soft).
+ fn intern(&'i self, value: &str) -> &'i Symbol<'i>;
+
+ /// Retrieve an existing intern for the string slice `s`.
+ ///
+ /// Unlike [`intern`](Interner::intern),
+ /// this will _not_ intern the string if it has not already been
+ /// interned.
+ fn intern_soft(&'i self, value: &str) -> Option<&'i Symbol<'i>>;
+
+ /// Determine whether the given value has already been interned.
+ fn contains(&self, value: &str) -> bool;
+
+ /// Number of interned strings.
+ ///
+ /// This count will increase each time a unique string is interned.
+ /// It does not increase when a string is already interned.
+ fn len(&self) -> usize;
+
+ /// Intern an assumed-UTF8 slice of bytes or return an existing
+ /// [`Symbol`].
+ ///
+ /// Safety
+ /// ======
+ /// This function is unsafe because it uses
+ /// [`std::str::from_utf8_unchecked`].
+ /// It is provided for convenience when interning from trusted binary
+ /// data
+ /// (such as [object files][]).
+ ///
+ /// [object files]: crate::obj
+ unsafe fn intern_utf8_unchecked(&'i self, value: &[u8]) -> &'i Symbol<'i> {
+ self.intern(std::str::from_utf8_unchecked(value))
+ }
+}
+
+/// An interner backed by an [arena](bumpalo).
+///
+/// Since interns exist until the interner itself is freed,
+/// an arena is a much more efficient and appropriate memory allocation
+/// strategy.
+/// This further provides a stable location in memory for symbol data.
+///
+/// For the recommended configuration,
+/// see [`DefaultInterner`].
+///
+/// See the [module-level documentation](self) for examples and more
+/// information on how to use this interner.
+pub struct ArenaInterner<'i, S>
+where
+ S: BuildHasher + Default,
+{
+ /// String and [`Symbol`] storage.
+ arena: Bump,
+
+ /// Next available symbol index.
+ ///
+ /// This must always be ≥1.
+ /// It is not defined as `NonZeroProgSymSize` because
+ /// `intern` enforces the invariant.
+ next_index: Cell<global::ProgSymSize>,
+
+ /// Map of interned strings to their respective [`Symbol`].
+ ///
+ /// Both strings and symbols are allocated within `arena`.
+ map: RefCell<HashMap<&'i str, &'i Symbol<'i>, S>>,
+}
+
+impl<'i, S> ArenaInterner<'i, S>
+where
+ S: BuildHasher + Default,
+{
+ /// Initialize a new interner with no initial capacity.
+ ///
+ /// Prefer [`with_capacity`](ArenaInterner::with_capacity) when possible.
+ #[inline]
+ pub fn new() -> Self {
+ Self::with_capacity(0)
+ }
+
+ /// Initialize a new interner with an initial capacity for the
+ /// underlying [`HashMap`].
+ ///
+ /// The given `capacity` has no affect on arena allocation.
+ /// Specifying initial capacity is important only for the map of strings
+ /// to symbols because it will reallocate and re-hash its contents
+ /// once capacity is exceeded.
+ /// See benchmarks.
+ ///
+ /// If reallocation is a major concern,
+ /// a [consistent hashing algorithm][consistent] could be considered,
+ /// but the implementation will still incur the cost of copying
+ /// the [`HashMap`]'s contents to a new location in memory.
+ ///
+ /// [consistent]: https://en.wikipedia.org/wiki/Consistent_hashing
+ #[inline]
+ pub fn with_capacity(capacity: usize) -> Self {
+ Self {
+ arena: Bump::new(),
+ next_index: Cell::new(1),
+ map: RefCell::new(HashMap::with_capacity_and_hasher(
+ capacity,
+ Default::default(),
+ )),
+ }
+ }
+}
+
+impl<'i, S> Interner<'i> for ArenaInterner<'i, S>
+where
+ S: BuildHasher + Default,
+{
+ fn intern(&'i self, value: &str) -> &'i Symbol<'i> {
+ let mut map = self.map.borrow_mut();
+
+ if let Some(sym) = map.get(value) {
+ return sym;
+ }
+
+ let next_index = self.next_index.get();
+
+ // Next_index should always be initialized to at least 1.
+ debug_assert!(next_index != 0);
+ let id = unsafe { SymbolIndex::from_u32_unchecked(next_index) };
+
+ // Copy string slice into the arena.
+ let clone: &'i str = unsafe {
+ &*(std::str::from_utf8_unchecked(
+ self.arena.alloc_slice_clone(value.as_bytes()),
+ ) as *const str)
+ };
+
+ // Symbols are also stored within the arena, adjacent to the
+ // string. This ensures that both have stable locations in memory.
+ let sym: &'i Symbol<'i> = self.arena.alloc(Symbol::new(id, &clone));
+
+ map.insert(clone, sym);
+ self.next_index.set(next_index + 1);
+
+ sym
+ }
+
+ #[inline]
+ fn intern_soft(&'i self, value: &str) -> Option<&'i Symbol<'i>> {
+ self.map.borrow().get(value).map(|sym| *sym)
+ }
+
+ #[inline]
+ fn contains(&self, value: &str) -> bool {
+ self.map.borrow().contains_key(value)
+ }
+
+ #[inline]
+ fn len(&self) -> usize {
+ self.map.borrow().len()
+ }
+}
+
+/// Interner using the [Fx Hash][fxhash] hashing function.
+///
+/// _This is currently the hash function used by [`DefaultInterner`]._
+///
+/// If denial of service is not a concern,
+/// then this will outperform the default
+/// [`DefaultHasher`](std::collections::hash_map::DefaultHasher)
+/// (which uses SipHash at the time of writing).
+///
+/// See intern benchmarks for a comparison.
+pub type FxArenaInterner<'i> = ArenaInterner<'i, FxBuildHasher>;
+
+/// Recommended [`Interner`] and configuration.
+///
+/// The choice of this default relies on the assumption that
+/// denial-of-service attacks against the hash function are not a
+/// concern.
+///
+/// For more information on the hashing algorithm,
+/// see [`FxArenaInterner`].
+pub type DefaultInterner<'i> = FxArenaInterner<'i>;
+
+#[cfg(test)]
+mod test {
+ use super::*;
+
+ mod symbol {
+ use super::*;
+
+ /// Option<Symbol> should have no space cost.
+ #[test]
+ fn symbol_index_option_no_cost() {
+ use std::mem::size_of;
+
+ assert_eq!(
+ size_of::<Option<Symbol>>(),
+ size_of::<Symbol>(),
+ "Option<Symbol> should be the same size as Symbol"
+ );
+ }
+
+ #[test]
+ fn self_compares_eq() {
+ let sym = Symbol::new(SymbolIndex::from_u32(1), "str");
+
+ assert_eq!(&sym, &sym);
+ }
+
+ #[test]
+ fn copy_compares_equal() {
+ let sym = Symbol::new(SymbolIndex::from_u32(1), "str");
+ let cpy = sym;
+
+ assert_eq!(sym, cpy);
+ }
+
+ // Integer values are for convenience, not identity. They cannot be
+ // used as a unique identifier across different interners.
+ #[test]
+ fn same_index_different_slices_compare_unequal() {
+ let a = Symbol::new(SymbolIndex::from_u32(1), "a");
+ let b = Symbol::new(SymbolIndex::from_u32(1), "b");
+
+ assert_ne!(a, b);
+ }
+
+ // As mentioned above, ids are _not_ the identity of the symbol. If
+ // two values point to the same location in memory, they are assumed
+ // to have come from the same interner, and should therefore have
+ // the same index this should never happen unless symbols are
+ // being created without the use of interners, which is unsupported.
+ //
+ // This test is a cautionary tale.
+ #[test]
+ fn different_index_same_slices_compare_equal() {
+ let slice = "str";
+
+ let a = Symbol::new(SymbolIndex::from_u32(1), slice);
+ let b = Symbol::new(SymbolIndex::from_u32(2), slice);
+
+ assert_eq!(a, b);
+ }
+
+ #[test]
+ fn cloned_symbols_compare_equal() {
+ let sym = Symbol::new(SymbolIndex::from_u32(1), "foo");
+
+ assert_eq!(sym, sym.clone());
+ }
+
+ // &Symbol can be used where string slices are expected (this won't
+ // compile otherwise).
+ #[test]
+ fn ref_can_be_used_as_string_slice() {
+ let slice = "str";
+ let sym_slice: &str = &Symbol::new(SymbolIndex::from_u32(1), slice);
+
+ assert_eq!(slice, sym_slice);
+ }
+
+ // For use when we can guarantee proper ids.
+ #[test]
+ fn can_create_index_unchecked() {
+ assert_eq!(SymbolIndex::from_u32(1), unsafe {
+ SymbolIndex::from_u32_unchecked(1)
+ });
+ }
+
+ #[test]
+ fn can_retrieve_symbol_index() {
+ let index = SymbolIndex::from_u32(1);
+
+ assert_eq!(index, Symbol::new(index, "").index());
+ }
+
+ #[test]
+ fn displays_as_interned_value() {
+ let sym = Symbol::new(SymbolIndex::from_u32(1), "foo");
+
+ assert_eq!(format!("{}", sym), sym.str);
+ }
+ }
+
+ mod interner {
+ use super::*;
+
+ type Sut<'i> = DefaultInterner<'i>;
+
+ #[test]
+ fn recognizes_equal_strings() {
+ let a = "foo";
+ let b = a.to_string();
+ let c = "bar";
+ let d = c.to_string();
+
+ let sut = Sut::new();
+
+ let (ia, ib, ic, id) =
+ (sut.intern(a), sut.intern(&b), sut.intern(c), sut.intern(&d));
+
+ assert_eq!(ia, ib);
+ assert_eq!(&ia, &ib);
+ assert_eq!(*ia, *ib);
+
+ assert_eq!(ic, id);
+ assert_eq!(&ic, &id);
+ assert_eq!(*ic, *id);
+
+ assert_ne!(ia, ic);
+ assert_ne!(&ia, &ic);
+ assert_ne!(*ia, *ic);
+ }
+
+ #[test]
+ fn symbol_id_increases_with_each_new_intern() {
+ let sut = Sut::new();
+
+ // Remember that identifiers begin at 1
+ assert_eq!(
+ SymbolIndex::from_u32(1),
+ sut.intern("foo").index(),
+ "First index should be 1"
+ );
+
+ assert_eq!(
+ SymbolIndex::from_u32(1),
+ sut.intern("foo").index(),
+ "Index should not increment for already-interned symbols"
+ );
+
+ assert_eq!(
+ SymbolIndex::from_u32(2),
+ sut.intern("bar").index(),
+ "Index should increment for new symbols"
+ );
+ }
+
+ #[test]
+ fn length_increases_with_each_new_intern() {
+ let sut = Sut::new();
+
+ assert_eq!(0, sut.len(), "invalid empty len");
+
+ sut.intern("foo");
+ assert_eq!(1, sut.len(), "increment len");
+
+ // duplicate
+ sut.intern("foo");
+ assert_eq!(1, sut.len(), "do not increment len on duplicates");
+
+ sut.intern("bar");
+ assert_eq!(2, sut.len(), "increment len (2)");
+ }
+
+ #[test]
+ fn can_check_wither_string_is_interned() {
+ let sut = Sut::new();
+
+ assert!(!sut.contains("foo"), "recognize missing value");
+ sut.intern("foo");
+ assert!(sut.contains("foo"), "recognize interned value");
+ }
+
+ #[test]
+ fn intern_soft() {
+ let sut = Sut::new();
+
+ assert_eq!(None, sut.intern_soft("foo"));
+
+ let foo = sut.intern("foo");
+ assert_eq!(Some(foo), sut.intern_soft("foo"));
+ }
+
+ #[test]
+ fn new_with_capacity() {
+ let n = 512;
+ let sut = Sut::with_capacity(n);
+
+ // note that this is not publicly available
+ assert!(sut.map.borrow().capacity() >= n);
+ }
+
+ #[test]
+ fn intern_utf8_unchecked() {
+ let sut = Sut::new();
+
+ let a = sut.intern("foo");
+ let b = unsafe { sut.intern_utf8_unchecked(b"foo") };
+
+ assert_eq!(a, b);
+ }
+ }
+}
diff --git a/tamer/src/test/mod.rs b/tamer/src/test/mod.rs
new file mode 100644
index 0000000..9b3b846
--- /dev/null
+++ b/tamer/src/test/mod.rs
@@ -0,0 +1,18 @@
+// Mocks, stubs, and other stuff for testing
+//
+// Copyright (C) 2014-2019 Ryan Specialty Group, LLC.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+pub mod quick_xml;
diff --git a/tamer/src/test/quick_xml/mod.rs b/tamer/src/test/quick_xml/mod.rs
new file mode 100644
index 0000000..a9b6291
--- /dev/null
+++ b/tamer/src/test/quick_xml/mod.rs
@@ -0,0 +1,125 @@
+// quick_xml mocks
+//
+// Copyright (C) 2014-2019 Ryan Specialty Group, LLC.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+use quick_xml::Result as XmlResult;
+use std::borrow::Cow;
+use std::cell::Cell;
+
+pub enum MockXmlEvent<'a> {
+ Start(MockBytesStart<'a>),
+ End(MockBytesEnd<'a>),
+ Empty(MockBytesStart<'a>),
+ #[allow(dead_code)]
+ Text(MockBytesText<'a>),
+}
+
+pub struct MockBytesStart<'a> {
+ name: &'a [u8],
+ attrs: Cell<Option<MockAttributes<'a>>>,
+}
+
+impl<'a> MockBytesStart<'a> {
+ pub fn new(name: &'a [u8], attrs: Option<MockAttributes<'a>>) -> Self {
+ Self {
+ name,
+ attrs: Cell::new(attrs),
+ }
+ }
+
+ pub fn name(&self) -> &[u8] {
+ self.name
+ }
+
+ pub fn attributes(&self) -> MockAttributes {
+ self.attrs.take().expect("missing mock attributes")
+ }
+}
+
+pub struct MockBytesEnd<'a> {
+ name: Cow<'a, [u8]>,
+}
+
+impl<'a> MockBytesEnd<'a> {
+ pub fn new(name: &'a [u8]) -> Self {
+ Self {
+ name: Cow::Borrowed(name),
+ }
+ }
+
+ pub fn name(&self) -> &[u8] {
+ &*self.name
+ }
+}
+
+pub struct MockBytesText<'a> {
+ #[allow(dead_code)]
+ content: Cow<'a, [u8]>,
+}
+
+impl<'a> MockBytesText<'a> {
+ pub fn new(content: &'a [u8]) -> Self {
+ Self {
+ content: Cow::Borrowed(content),
+ }
+ }
+}
+
+pub struct MockAttributes<'a> {
+ attrs: Vec<MockAttribute<'a>>,
+ with_checks: Option<bool>,
+}
+
+impl<'a> MockAttributes<'a> {
+ pub fn new(attrs: Vec<MockAttribute<'a>>) -> Self {
+ Self {
+ attrs,
+ with_checks: None,
+ }
+ }
+
+ pub fn with_checks(&mut self, val: bool) -> &mut Self {
+ self.with_checks = Some(val);
+ self
+ }
+}
+
+impl<'a> Iterator for MockAttributes<'a> {
+ type Item = XmlResult<MockAttribute<'a>>;
+
+ fn next(&mut self) -> Option<Self::Item> {
+ // We read output from Saxon, which will always be valid
+ if self.with_checks != Some(false) {
+ panic!("MockAttributes expected with_checks false")
+ }
+
+ self.attrs.pop().map(|attr| Ok(attr))
+ }
+}
+
+pub struct MockAttribute<'a> {
+ pub key: &'a [u8],
+ pub value: Cow<'a, [u8]>,
+}
+
+impl<'a> MockAttribute<'a> {
+ pub fn new(key: &'a [u8], value: &'a [u8]) -> Self {
+ Self {
+ key,
+ value: Cow::Borrowed(&value[..]),
+ }
+ }
+}