Mike Gerwitz

Activist for User Freedom

aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--.gitlab-ci.yml4
-rw-r--r--Makefile.am1
-rw-r--r--RELEASES.md5
-rwxr-xr-xbootstrap1
-rw-r--r--build-aux/Makefile.am13
-rwxr-xr-xbuild-aux/list2typedef2
-rwxr-xr-xbuild-aux/test/test-list2typedef2
-rw-r--r--configure.ac4
-rw-r--r--core/alias.xml1
-rw-r--r--core/assert.xml1
-rw-r--r--core/base.xml19
-rw-r--r--core/cond.xml17
-rw-r--r--core/datetime.xml4
-rw-r--r--core/dummy.xml1
-rw-r--r--core/extern.xml1
-rw-r--r--core/map.xml2
-rw-r--r--core/numeric/boolean.xml5
-rw-r--r--core/numeric/convert.xml24
-rw-r--r--core/numeric/minmax.xml28
-rw-r--r--core/numeric/percent.xml4
-rw-r--r--core/numeric/round.xml18
-rw-r--r--core/tdat.xml3
-rw-r--r--core/test/core/numeric/round.xml2
-rw-r--r--core/test/core/vector/interpolate.xml4
-rw-r--r--core/test/spec.xml8
-rw-r--r--core/vector/arithmetic.xml2
-rw-r--r--core/vector/cmatch.xml35
-rw-r--r--core/vector/common.xml4
-rw-r--r--core/vector/convert.xml1
-rw-r--r--core/vector/count.xml5
-rw-r--r--core/vector/interpolate.xml6
-rw-r--r--core/vector/list.xml2
-rw-r--r--core/vector/matrix.xml2
-rw-r--r--core/vector/minmax.xml6
-rw-r--r--core/vector/table.xml12
-rw-r--r--progtest/Makefile.am3
-rw-r--r--src/current/include/preproc/template.xsl55
-rw-r--r--src/current/src/Makefile3
-rw-r--r--src/js/sha256.js594
-rw-r--r--tamer/Cargo.toml17
-rw-r--r--tamer/Makefile.am2
-rw-r--r--tamer/benches/xir.rs14
-rw-r--r--tamer/configure.ac2
-rw-r--r--tamer/src/asg/air.rs18
-rw-r--r--tamer/src/bin/tamec.rs323
-rw-r--r--tamer/src/diagnose.rs29
-rw-r--r--tamer/src/diagnose/panic.rs260
-rw-r--r--tamer/src/diagnose/report.rs32
-rw-r--r--tamer/src/diagnose/report/test.rs2
-rw-r--r--tamer/src/diagnose/report/test/integration.rs88
-rw-r--r--tamer/src/diagnose/resolve.rs1
-rw-r--r--tamer/src/fmt.rs123
-rw-r--r--tamer/src/ld/poc.rs86
-rw-r--r--tamer/src/ld/xmle/xir.rs2
-rw-r--r--tamer/src/ld/xmle/xir/test.rs2
-rw-r--r--tamer/src/lib.rs21
-rw-r--r--tamer/src/nir.rs442
-rw-r--r--tamer/src/nir/desugar.rs90
-rw-r--r--tamer/src/nir/desugar/interp.rs505
-rw-r--r--tamer/src/nir/desugar/interp/test.rs336
-rw-r--r--tamer/src/nir/desugar/test.rs36
-rw-r--r--tamer/src/nir/parse.rs1871
-rw-r--r--tamer/src/obj/xmlo/air.rs15
-rw-r--r--tamer/src/obj/xmlo/error.rs6
-rw-r--r--tamer/src/obj/xmlo/reader.rs73
-rw-r--r--tamer/src/obj/xmlo/reader/test.rs106
-rw-r--r--tamer/src/parse.rs308
-rw-r--r--tamer/src/parse/error.rs158
-rw-r--r--tamer/src/parse/lower.rs187
-rw-r--r--tamer/src/parse/parser.rs94
-rw-r--r--tamer/src/parse/state.rs150
-rw-r--r--tamer/src/parse/state/transition.rs132
-rw-r--r--tamer/src/parse/trace.rs173
-rw-r--r--tamer/src/parse/util.rs92
-rw-r--r--tamer/src/span.rs130
-rw-r--r--tamer/src/sym/prefill.rs343
-rw-r--r--tamer/src/sym/symbol.rs5
-rw-r--r--tamer/src/xir.rs113
-rw-r--r--tamer/src/xir/attr.rs21
-rw-r--r--tamer/src/xir/attr/parse.rs29
-rw-r--r--tamer/src/xir/flat.rs310
-rw-r--r--tamer/src/xir/flat/test.rs222
-rw-r--r--tamer/src/xir/fmt.rs27
-rw-r--r--tamer/src/xir/iter.rs2
-rw-r--r--tamer/src/xir/parse.rs8
-rw-r--r--tamer/src/xir/parse/attr.rs771
-rw-r--r--tamer/src/xir/parse/attr/test.rs632
-rw-r--r--tamer/src/xir/parse/ele.rs2139
-rw-r--r--tamer/src/xir/parse/ele/test.rs2126
-rw-r--r--tamer/src/xir/parse/error.rs291
-rw-r--r--tamer/src/xir/reader.rs40
-rw-r--r--tamer/src/xir/reader/test.rs2
-rw-r--r--tamer/src/xir/st.rs175
-rw-r--r--tamer/src/xir/tree.rs19
-rw-r--r--tamer/src/xir/tree/test.rs6
-rw-r--r--tamer/src/xir/writer.rs58
96 files changed, 12167 insertions, 2002 deletions
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index aa61760..c288711 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -24,7 +24,9 @@ build:
artifacts:
paths:
- doc/
- - tamer/target
+ - tamer/target/*/tamec
+ - tamer/target/*/tameld
+ - tamer/target/doc
expire_in: 30 min
build:doc:tpl:
diff --git a/Makefile.am b/Makefile.am
index e07d9ba..7ade14a 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -40,6 +40,7 @@ export DSLC_CLASSPATH
all: applies progtest
all-nodoc: applies progtest
+bin-local: applies
# the "applies" are hoxsl-generated stylesheets containing definitions to
# permit partial function application
diff --git a/RELEASES.md b/RELEASES.md
index 0dc044f..487bc21 100644
--- a/RELEASES.md
+++ b/RELEASES.md
@@ -13,10 +13,13 @@ TAME developers: Add new changes under a "NEXT" heading as part of the
commits that introduce the changes. To make a new release, run
`tools/mkrelease`, which will handle updating the heading for you.
+v19.1.0 (2022-09-22)
+====================
+Add gt/gte/lt/lte operators to if/unless
v19.0.3 (2022-04-01)
====================
-Add upper/lower state code abbrevation
+Add upper/lower state code abbreviation
v19.0.2 (2022-03-07)
diff --git a/bootstrap b/bootstrap
index 844c68f..d1f7fd4 100755
--- a/bootstrap
+++ b/bootstrap
@@ -34,5 +34,4 @@ test "${1:-}" = -n || git submodule update --init --recursive
&& { test -e hoxsl || test -L hoxsl || ln -s ../hoxsl; } \
&& autoreconf -fvi \
&& ./configure \
- && make all
diff --git a/build-aux/Makefile.am b/build-aux/Makefile.am
index af4c696..c5deb90 100644
--- a/build-aux/Makefile.am
+++ b/build-aux/Makefile.am
@@ -197,7 +197,7 @@ c1map: $(dest_c1map)
%.xml: %.dat rater/core/tdat.xmlo rater/tools/tdat2xml
rater/tools/tdat2xml $< > $@
-%.xml: %.typelist
+%.xml: %.typelist rater/tame/build-aux/list2typedef
rater/tame/build-aux/list2typedef $(*F) < $< > $@
%.csvo: %.csvm rater/tools/csvm2csv
@@ -275,11 +275,20 @@ clean:
| sed 's/\.csvm$$/\.xml/; s/\.dat$$/\.xml/' \
| xargs rm -fv
+# A target to be optionally overridden by `bootstrap.mk`.
+.PHONY: bootstrap-if-necessary
+bootstrap-if-necessary: FORCE
+
+# Targets intended to be run before the generation of `suppliers.mk`.
+# This should be used to re-bootstrap the system if necessary
+# (see `bootstrap-if-necessary` target).
+-include bootstrap.mk
+
# Generates a Makefile that will properly build all package
# dependencies. The redirect of ant to /dev/null is because it's still too
# noisy even with -q---the "BUILD SUCCESSFUL" line is confusing, considering
# it's merely a small part of a broader build.
-suppliers.mk: $(src_suppliersmk)
+suppliers.mk: $(src_suppliersmk) | bootstrap-if-necessary
$(ant) -q pkg-dep >/dev/null
find $(path_ui)/program/ -name '*.dep' | xargs cat $(path_ui)/program.dep | sort -u \
> $(path_ui)/package-dfns.dep
diff --git a/build-aux/list2typedef b/build-aux/list2typedef
index d19c368..350df8c 100755
--- a/build-aux/list2typedef
+++ b/build-aux/list2typedef
@@ -150,6 +150,8 @@ main()
cat <<EOF
<?xml version="1.0"?>
<package xmlns="http://www.lovullo.com/rater"
+ xmlns:c="http://www.lovullo.com/calc"
+ xmlns:t="http://www.lovullo.com/rater/apply-template"
title="$typedef Type">
<typedef name="$typedef" desc="$typedef">
<enum type="integer">
diff --git a/build-aux/test/test-list2typedef b/build-aux/test/test-list2typedef
index 21e4a59..9f967ad 100755
--- a/build-aux/test/test-list2typedef
+++ b/build-aux/test/test-list2typedef
@@ -38,6 +38,8 @@ Second'\''s @ @Line
declare -r expected='<?xml version="1.0"?>
<package xmlns="http://www.lovullo.com/rater"
+ xmlns:c="http://www.lovullo.com/calc"
+ xmlns:t="http://www.lovullo.com/rater/apply-template"
title="FooType Type">
<typedef name="FooType" desc="FooType">
<enum type="integer">
diff --git a/configure.ac b/configure.ac
index 2daff6f..92ff6cc 100644
--- a/configure.ac
+++ b/configure.ac
@@ -23,9 +23,7 @@ m4_if(ver, [], [m4_exit(1)])
AC_INIT([tame], [ver], [dev@lovullo.com])
AC_CONFIG_AUX_DIR([tools])
AM_INIT_AUTOMAKE([foreign])
-
-# target that should be be added to everything except doc/
-AM_EXTRA_RECURSIVE_TARGETS([all-nodoc])
+AM_EXTRA_RECURSIVE_TARGETS([bin all-nodoc])
# provide more granular version numbers based on the version string, using
# the format MAJOR.MINOR.REV[-SUFFIX], where SUFFIX can itself contain
diff --git a/core/alias.xml b/core/alias.xml
index a2bb1a6..d5e855d 100644
--- a/core/alias.xml
+++ b/core/alias.xml
@@ -19,6 +19,7 @@
-->
<package xmlns="http://www.lovullo.com/rater"
xmlns:c="http://www.lovullo.com/calc"
+ xmlns:t="http://www.lovullo.com/rater/apply-template"
core="true"
desc="Aliasing Values">
diff --git a/core/assert.xml b/core/assert.xml
index 1685339..6a5e1e7 100644
--- a/core/assert.xml
+++ b/core/assert.xml
@@ -18,6 +18,7 @@
along with this program. If not, see <http://www.gnu.org/licenses/>.
-->
<package xmlns="http://www.lovullo.com/rater"
+ xmlns:c="http://www.lovullo.com/calc"
xmlns:t="http://www.lovullo.com/rater/apply-template"
core="true"
desc="Assertions">
diff --git a/core/base.xml b/core/base.xml
index 676d23b..872773d 100644
--- a/core/base.xml
+++ b/core/base.xml
@@ -46,19 +46,6 @@
desc="Dummy value; this set is populated upon entering
each rate block" />
</const>
-
-
- The runtime is responsible for populating \ref{__DATE_YEAR__} with
- a proper value representing the current year.
-
- \todo{TAME is deterministic with this one exception; remove it and
- have users use the params from {\tt datetime} instead if they need this
- datum.}
-
- <const name="__DATE_YEAR__" magic="true"
- value="0" type="integer"
- desc="Current year"
- sym="\widehat{D^\gamma}" />
</section>
@@ -152,14 +139,12 @@
<classify as="always"
desc="Always true"
- yields="alwaysTrue"
- keep="true" />
+ yields="alwaysTrue" />
<classify as="never"
any="true"
desc="Never true"
- yields="neverTrue"
- keep="true" />
+ yields="neverTrue" />
</section>
diff --git a/core/cond.xml b/core/cond.xml
index 60a1de1..ca8adfb 100644
--- a/core/cond.xml
+++ b/core/cond.xml
@@ -19,6 +19,7 @@
-->
<package xmlns="http://www.lovullo.com/rater"
xmlns:c="http://www.lovullo.com/calc"
+ xmlns:t="http://www.lovullo.com/rater/apply-template"
core="true"
desc="Generic conditionals">
@@ -35,7 +36,7 @@
<c:case>
<c:when name="or_a">
<c:eq>
- <c:const value="0" type="integer" desc="Return B if A is 0" />
+ <c:const value="0" desc="Return B if A is 0" />
</c:eq>
</c:when>
@@ -65,7 +66,7 @@
<param-value name="@name@" />
</param>
- <c:const value="@value@" type="@type@" desc="@desc@">
+ <c:const value="@value@" desc="@desc@">
<!-- TODO: non-index option -->
<c:when name="@name@" index="@index@">
<c:eq>
@@ -86,14 +87,14 @@
<if name="@index@">
<c:when name="@name@" index="@index@">
<c:gt>
- <c:const value="0" type="integer" desc="Use override if greater than 0" />
+ <c:const value="0" desc="Use override if greater than 0" />
</c:gt>
</c:when>
</if>
<unless name="@index@">
<c:when name="@name@">
<c:gt>
- <c:const value="0" type="integer" desc="Use override if greater than 0" />
+ <c:const value="0" desc="Use override if greater than 0" />
</c:gt>
</c:when>
</unless>
@@ -122,11 +123,11 @@
<c:case>
<c:when name="@name@" index="@index@">
<c:eq>
- <c:const value="0" type="integer" desc="No value" />
+ <c:const value="0" desc="No value" />
</c:eq>
</c:when>
- <c:const value="@default@" type="integer" desc="Default value" />
+ <c:const value="@default@" desc="Default value" />
</c:case>
<c:otherwise>
@@ -149,13 +150,13 @@
</param>
<!-- simply returns a constant value for the class match -->
- <rate-each class="@class@" accumulate="none" generates="@generates@" index="k">
+ <rate-each class="@class@" generates="@generates@" index="k">
<c:product>
<if name="@value@">
<c:value-of name="@value@" />
</if>
<unless name="@value@">
- <c:const value="@const@" type="float" desc="@desc@" />
+ <c:const value="@const@" desc="@desc@" />
</unless>
<!-- if this is not provided, then the c:product will be optimized away -->
diff --git a/core/datetime.xml b/core/datetime.xml
index 0aae7cc..1282cc3 100644
--- a/core/datetime.xml
+++ b/core/datetime.xml
@@ -94,7 +94,7 @@
<c:value-of name="@default@" index="k" />
</if>
<unless name="@default@">
- <c:const value="0" type="integer" desc="Condition not met, but no default" />
+ <c:const value="0" desc="Condition not met, but no default" />
</unless>
</c:case>
</if>
@@ -105,7 +105,7 @@
<c:case>
<c:when name="@yearset@" index="k">
<c:gt>
- <c:const value="0" type="integer" desc="Only calculate difference if a value is available" />
+ <c:const value="0" desc="Only calculate difference if a value is available" />
</c:gt>
</c:when>
diff --git a/core/dummy.xml b/core/dummy.xml
index e752dd3..10af0de 100644
--- a/core/dummy.xml
+++ b/core/dummy.xml
@@ -19,6 +19,7 @@
-->
<package xmlns="http://www.lovullo.com/rater"
xmlns:c="http://www.lovullo.com/calc"
+ xmlns:t="http://www.lovullo.com/rater/apply-template"
core="true"
title="Dummy Values">
diff --git a/core/extern.xml b/core/extern.xml
index cc61132..d190d35 100644
--- a/core/extern.xml
+++ b/core/extern.xml
@@ -19,6 +19,7 @@
-->
<package xmlns="http://www.lovullo.com/rater"
xmlns:c="http://www.lovullo.com/calc"
+ xmlns:t="http://www.lovullo.com/rater/apply-template"
core="true"
desc="Extern Definition">
diff --git a/core/map.xml b/core/map.xml
index 18b84f0..7caea0c 100644
--- a/core/map.xml
+++ b/core/map.xml
@@ -68,7 +68,7 @@
<unless name="@default@" eq="">
<c:otherwise>
- <c:const value="@default@" type="integer" desc="No mapping" />
+ <c:const value="@default@" desc="No mapping" />
</c:otherwise>
</unless>
</c:cases>
diff --git a/core/numeric/boolean.xml b/core/numeric/boolean.xml
index 64f0b76..9c2fa4f 100644
--- a/core/numeric/boolean.xml
+++ b/core/numeric/boolean.xml
@@ -19,6 +19,7 @@
-->
<package xmlns="http://www.lovullo.com/rater"
xmlns:c="http://www.lovullo.com/calc"
+ xmlns:t="http://www.lovullo.com/rater/apply-template"
core="true"
desc="Numeric computations dealing with boolean algebra">
@@ -28,10 +29,10 @@
<function name="not" desc="Negates a boolean value" sym="\lnot">
<param name="not_value" type="boolean" desc="Boolean value to negate" />
- <c:const value="1" type="boolean" desc="Value of 1 if given value is zero">
+ <c:const value="1" desc="Value of 1 if given value is zero">
<c:when name="not_value">
<c:eq>
- <c:const value="0" type="boolean" desc="Value to assert against for returning 1" />
+ <c:const value="0" desc="Value to assert against for returning 1" />
</c:eq>
</c:when>
</c:const>
diff --git a/core/numeric/convert.xml b/core/numeric/convert.xml
index b0eb163..eb95718 100644
--- a/core/numeric/convert.xml
+++ b/core/numeric/convert.xml
@@ -33,30 +33,6 @@
<import package="round" export="true" />
- <!-- even more trivial, but again, cuts down on code -->
- <template name="_scalarToAccum_" desc="Simply accumulates a scalar">
- <param name="@scalar@" desc="Scalar to accumulate" />
- <param name="@accum@" desc="Accumulator to accumulate into" />
-
- <!-- this is useless, but required -->
- <param name="@yields@" desc="Value to yield into, since it's required (useless)">
- <text>__accum_</text>
- <param-value name="@accum@" />
- <text>_</text>
- <param-value name="@scalar@" />
- </param>
-
- <param name="@type@" desc="Accumulation method">
- <text>all</text>
- </param>
-
- <rate yields="@yields@">
- <accumulate into="@accum@" type="@type@" />
- <c:value-of name="@scalar@" />
- </rate>
- </template>
-
-
<!--
Map values falling within adjacent intervals
diff --git a/core/numeric/minmax.xml b/core/numeric/minmax.xml
index 94477b5..0fd36eb 100644
--- a/core/numeric/minmax.xml
+++ b/core/numeric/minmax.xml
@@ -79,7 +79,7 @@
<c:apply name="max" label="@label@">
<c:arg name="max1">
- <c:const value="0" type="integer" desc="Do not allow a value under 0" />
+ <c:const value="0" desc="Do not allow a value under 0" />
</c:arg>
<c:arg name="max2">
@@ -102,7 +102,7 @@
<param-value name="@generates@" />
</param>
- <rate-each class="@class@" accumulate="none" yields="@yields@" generates="@generates@" index="k">
+ <rate-each class="@class@" yields="@yields@" generates="@generates@" index="k">
<c:apply name="max">
<c:arg name="max1">
<c:value-of name="@a@" index="k" />
@@ -135,12 +135,11 @@
<c:arg name="min1">
<!-- deprecated -->
<if name="@value@">
- <c:const value="@value@" type="float" desc="@desc@" />
+ <c:const value="@value@" desc="@desc@" />
</if>
<unless name="@value@">
<c:value-of name="@name@"
index="@index@"
- type="float"
label="@desc@" />
</unless>
</c:arg>
@@ -162,7 +161,7 @@
<c:apply name="max" label="{@label@}, minimum of 1">
<c:arg name="max1">
- <c:const value="@min@" type="float" desc="Minimum value" />
+ <c:const value="@min@" desc="Minimum value" />
</c:arg>
<c:arg name="max2">
@@ -179,10 +178,10 @@
<param name="@desc@" desc="Description" />
<c:gte>
- <c:const value="@min@" type="float" desc="{@desc@}; minimum" />
+ <c:const value="@min@" desc="{@desc@}; minimum" />
</c:gte>
<c:lte>
- <c:const value="@max@" type="float" desc="{@desc@}; maximum" />
+ <c:const value="@max@" desc="{@desc@}; maximum" />
</c:lte>
</template>
@@ -202,7 +201,6 @@
<param name="@generates@" desc="Variable to generate into" />
<param name="@when@" desc="Conditional bump" />
<param name="@class@" desc="Class to match on" />
- <param name="@keep@" desc="Value of keep flag" />
<!-- alternative to @name@ -->
<param name="@const@" desc="Constant value, instead of named" />
@@ -211,7 +209,7 @@
<param name="@maxpercent@" desc="Maximum percent" />
- <rate yields="_{@generates@}" keep="@keep@">
+ <rate yields="_{@generates@}">
<c:sum of="@name@" index="k" generates="@generates@" desc="Bumped value">
<c:cases>
<!-- if a condition was provided, check it first -->
@@ -238,7 +236,7 @@
</c:when>
<!-- just return the value provided -->
- <c:const value="0" type="float" desc="Zero value" />
+ <c:const value="0" desc="Zero value" />
</c:case>
</if>
@@ -266,8 +264,8 @@
</unless>
<c:quotient label="Percent as real number">
- <c:const value="@percent@" type="integer" desc="Whole percent" />
- <c:const value="100" type="integer" desc="Divisor to convert percent to real number" />
+ <c:const value="@percent@" desc="Whole percent" />
+ <c:const value="100" desc="Divisor to convert percent to real number" />
</c:quotient>
</if>
@@ -279,7 +277,7 @@
</unless>
<if name="@const@">
- <c:const value="@const@" type="float" desc="Constant minimum value" />
+ <c:const value="@const@" desc="Constant minimum value" />
</if>
</c:value>
</c:values>
@@ -305,8 +303,8 @@
</unless>
<c:quotient label="Max percent as real number">
- <c:const value="@maxpercent@" type="integer" desc="Whole max percent" />
- <c:const value="100" type="integer" desc="Divisor to convert max percent to real number" />
+ <c:const value="@maxpercent@" desc="Whole max percent" />
+ <c:const value="100" desc="Divisor to convert max percent to real number" />
</c:quotient>
</c:product>
</c:value>
diff --git a/core/numeric/percent.xml b/core/numeric/percent.xml
index dce5d0b..0a6844b 100644
--- a/core/numeric/percent.xml
+++ b/core/numeric/percent.xml
@@ -132,11 +132,11 @@
<c:value-of name="@name@" index="@index@" />
</if>
<unless name="@name@">
- <c:const value="@value@" type="float" desc="@desc@" />
+ <c:const value="@value@" desc="@desc@" />
</unless>
</c:product>
- <c:const value="100" type="integer" desc="Convert to rational number" />
+ <c:const value="100" desc="Convert to rational number" />
</c:quotient>
</c:sum>
</template>
diff --git a/core/numeric/round.xml b/core/numeric/round.xml
index 48505c7..9e07794 100644
--- a/core/numeric/round.xml
+++ b/core/numeric/round.xml
@@ -42,8 +42,8 @@
<c:arg name="round_real_n">
<c:expt>
- <c:const value="10" type="integer" desc="Decimal base" />
- <c:const value="@precision@" type="integer" desc="Exponent" />
+ <c:const value="10" desc="Decimal base" />
+ <c:const value="@precision@" desc="Exponent" />
</c:expt>
</c:arg>
</c:apply>
@@ -111,7 +111,7 @@
desc="Exponential/step divisor">
<c:product>
<c:expt>
- <c:const value="10" type="integer"
+ <c:const value="10"
desc="Decimal base" />
<c:value-of name="@exp@" />
</c:expt>
@@ -196,7 +196,7 @@
<c:floor>
<c:sum>
<c:value-of name="roundval" />
- <c:const value="0.5" type="float" desc="Raises value in a manner that it can be properly rounded by a floor" />
+ <c:const value="0.5" desc="Raises value in a manner that it can be properly rounded by a floor" />
</c:sum>
</c:floor>
</function>
@@ -229,7 +229,7 @@
<c:apply name="round_real">
<c:arg name="round_real_n">
- <c:const value="100" type="integer" desc="Round to the nearest 100th" />
+ <c:const value="100" desc="Round to the nearest 100th" />
</c:arg>
<c:arg name="round_real_val">
@@ -248,15 +248,15 @@
<c:quotient>
<param-copy name="@values@" />
<c:expt>
- <c:const value="10" type="integer" desc="Decimal base" />
- <c:const value="@digits@" type="integer" desc="Number of digits" />
+ <c:const value="10" desc="Decimal base" />
+ <c:const value="@digits@" desc="Number of digits" />
</c:expt>
</c:quotient>
</c:ceil>
<c:expt>
- <c:const value="10" type="integer" desc="Decimal base" />
- <c:const value="@digits@" type="integer" desc="Number of digits" />
+ <c:const value="10" desc="Decimal base" />
+ <c:const value="@digits@" desc="Number of digits" />
</c:expt>
</c:product>
</template>
diff --git a/core/tdat.xml b/core/tdat.xml
index 2980771..b0de277 100644
--- a/core/tdat.xml
+++ b/core/tdat.xml
@@ -19,6 +19,7 @@
-->
<package xmlns="http://www.lovullo.com/rater"
xmlns:c="http://www.lovullo.com/calc"
+ xmlns:t="http://www.lovullo.com/rater/apply-template"
core="true"
desc="Territory data support (used in conjunction with tdat script)">
@@ -37,7 +38,7 @@
<rate-each class="@class@" yields="@yields@" generates="@generates@" index="k">
- <c:const value="@code@" type="integer" desc="Territory code" />
+ <c:const value="@code@" desc="Territory code" />
</rate-each>
</template>
</package>
diff --git a/core/test/core/numeric/round.xml b/core/test/core/numeric/round.xml
index b355c5e..d4c054c 100644
--- a/core/test/core/numeric/round.xml
+++ b/core/test/core/numeric/round.xml
@@ -50,7 +50,7 @@
<const name="VALUE_VEC" sym="V"
type="float"
desc="Vector of values">
- <item value="0" />
+ <item value="0" desc="Unused (see VALUE_VEC_INDEX)" />
<item value="5.5" desc="Same as VALUE_MID" />
</const>
<const name="VALUE_VEC_INDEX" sym="\nu"
diff --git a/core/test/core/vector/interpolate.xml b/core/test/core/vector/interpolate.xml
index ede3b33..e93a34f 100644
--- a/core/test/core/vector/interpolate.xml
+++ b/core/test/core/vector/interpolate.xml
@@ -171,7 +171,7 @@
step="INTERP_TABLE_STEP"
actual="#300">
<t:where-eq field="pred">
- <c:const value="31" type="float"
+ <c:const value="31"
desc="Test predicate value" />
</t:where-eq>
</t:interpolate-query-field>
@@ -221,7 +221,7 @@
step="INTERP_TABLE_STEP"
actual="#350">
<t:where-eq field="pred">
- <c:const value="31" type="float"
+ <c:const value="31"
desc="Test predicate value" />
</t:where-eq>
</t:interpolate-query-field>
diff --git a/core/test/spec.xml b/core/test/spec.xml
index 5555ffa..6dbe928 100644
--- a/core/test/spec.xml
+++ b/core/test/spec.xml
@@ -49,8 +49,6 @@
<package xmlns="http://www.lovullo.com/rater"
xmlns:t="http://www.lovullo.com/rater/apply-template"
xmlns:c="http://www.lovullo.com/calc"
- xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
- xsi:schemaLocation="http://www.lovullo.com/rater ../../rater.xsd"
core="true"
@@ -76,8 +74,7 @@
-->
<classify as="expect-ok"
yields="@result@"
- desc="All features conform to specifications"
- keep="true">
+ desc="All features conform to specifications">
<inline-template>
<for-each>
<sym-set name-prefix="expect-conform-"
@@ -154,8 +151,7 @@
<expand-sequence>
<expand-sequence>
<classify as="expect-conform-{@__prefix@}{@__uniq@}"
- desc="{@__full_name@} meets expectations"
- keep="true">
+ desc="{@__full_name@} meets expectations">
<inline-template>
<for-each>
<sym-set name-prefix="expect-that-{@__prefix@}"
diff --git a/core/vector/arithmetic.xml b/core/vector/arithmetic.xml
index 39d4cfb..9884f01 100644
--- a/core/vector/arithmetic.xml
+++ b/core/vector/arithmetic.xml
@@ -56,7 +56,7 @@
<text></text>
</param>
- <rate accumulate="none" yields="@yields@">
+ <rate yields="@yields@">
<c:sum of="@a@" index="k" generates="@into@" desc="@gendesc@" sym="@sym@">
<c:value-of name="@a@" index="k" />
<c:value-of name="@b@" index="k" />
diff --git a/core/vector/cmatch.xml b/core/vector/cmatch.xml
index f479a81..fcb57af 100644
--- a/core/vector/cmatch.xml
+++ b/core/vector/cmatch.xml
@@ -52,9 +52,6 @@
<template name="_cmatch-to-vector_" desc="Vectorizes a classification match">
<param name="@class@" desc="Classification match string" />
<param name="@generates@" desc="Variable to yield generates (will yield a vector)" />
- <param name="@keep@" desc="Rate block @keep">
- <text></text>
- </param>
<param name="@yields@" desc="Dummy variable to yield generates (useless, but required)">
<text>__</text>
@@ -74,7 +71,7 @@
<!-- this conversion is as simple as using a generator to yield the value
of _CMATCH_ for each index -->
- <rate class="@class@" accumulate="none" yields="@yields@" always="true" keep="@keep@">
+ <rate class="@class@" yields="@yields@">
<c:sum of="_CMATCH_" index="k" generates="@generates@" desc="@gendesc@" sym="@sym@">
<c:value-of name="_CMATCH_" index="k" />
</c:sum>
@@ -90,12 +87,8 @@
<text></text>
</param>
- <param name="@keep@" desc="Rate block @keep">
- <text></text>
- </param>
-
- <rate class="@class@" accumulate="none" yields="@yields@" sym="@sym@" keep="@keep@">
+ <rate class="@class@" yields="@yields@" sym="@sym@">
<!-- if any single one matches, then we want to yield a 1 -->
<c:apply name="maxreduce" maxreduce_set="_CMATCH_" />
</rate>
@@ -124,10 +117,6 @@
<param-value snake="true" name="@as@" />
</param>
- <param name="@keep@" desc="Whether to force compilation">
- <text></text>
- </param>
-
<param name="@sym@" desc="Optional yield symbol">
<text></text>
</param>
@@ -141,12 +130,10 @@
<t:cmatch-to-scalar class="--{@as@}-pre"
yields="__{@yields@}Scalar"
- sym="@sym@"
- keep="@keep@" />
+ sym="@sym@" />
<classify as="@as@" yields="@yields@"
desc="@desc@"
- keep="@keep@"
sym="@sym@">
<match on="__{@yields@}Scalar" />
</classify>
@@ -228,25 +215,11 @@
<template name="_match-{@cmp@}_" desc="Match value {@cmp@}">
<param name="@on@" desc="Value to assert" />
- <!-- pick one -->
- <param name="@const@" desc="Match against constant value" />
<param name="@value@" desc="Match against variable" />
- <if name="@const@">
- <warning>
- @const@ is deprecated; use @value@ with a #-prefix instead.
- </warning>
- </if>
-
<match on="@on@">
<dyn-node name="c:{@cmp@}">
- <if name="@const@">
- <c:const value="@const@" type="float" desc="Comparison" />
- </if>
-
- <unless name="@const@">
- <c:value-of name="@value@" />
- </unless>
+ <c:value-of name="@value@" />
</dyn-node>
</match>
</template>
diff --git a/core/vector/common.xml b/core/vector/common.xml
index 14c0cdd..8769f75 100644
--- a/core/vector/common.xml
+++ b/core/vector/common.xml
@@ -136,7 +136,7 @@
</c:gte>
</c:when>
- <c:const value="-1" type="integer" desc="Not found" />
+ <c:const value="-1" desc="Not found" />
</c:case>
@@ -199,7 +199,7 @@
<!-- generates a variable that can be recognized as an empty set (useful for
defaults to params that require sets) -->
<rate-each class="always" yields="__empty" generates="__emptySet" index="k">
- <c:const value="0" type="integer" desc="Nothing" />
+ <c:const value="0" desc="Nothing" />
</rate-each>
</package>
diff --git a/core/vector/convert.xml b/core/vector/convert.xml
index ea9996b..e1599ed 100644
--- a/core/vector/convert.xml
+++ b/core/vector/convert.xml
@@ -19,6 +19,7 @@
-->
<package xmlns="http://www.lovullo.com/rater"
xmlns:c="http://www.lovullo.com/calc"
+ xmlns:t="http://www.lovullo.com/rater/apply-template"
core="true"
desc="Convert vectors into other types">
diff --git a/core/vector/count.xml b/core/vector/count.xml
index 4cdc31e..b19e68b 100644
--- a/core/vector/count.xml
+++ b/core/vector/count.xml
@@ -19,6 +19,7 @@
-->
<package xmlns="http://www.lovullo.com/rater"
xmlns:c="http://www.lovullo.com/calc"
+ xmlns:t="http://www.lovullo.com/rater/apply-template"
core="true"
desc="Vector element counting">
@@ -30,7 +31,7 @@
<param name="count_set" type="integer" set="vector" desc="Vector to count" />
<c:sum of="count_set" index="k">
- <c:const value="1" type="integer" desc="Add 1 for each value in the set" />
+ <c:const value="1" desc="Add 1 for each value in the set" />
</c:sum>
</function>
@@ -51,7 +52,7 @@
</c:apply>
<!-- ensure the equation is not undefined if length = 0 -->
- <c:const value="1" type="integer" desc="Add 1 to ensure equation is always defined" />
+ <c:const value="1" desc="Add 1 to ensure equation is always defined" />
</c:sum>
</c:quotient>
</c:ceil>
diff --git a/core/vector/interpolate.xml b/core/vector/interpolate.xml
index c74f32d..5688b32 100644
--- a/core/vector/interpolate.xml
+++ b/core/vector/interpolate.xml
@@ -57,7 +57,7 @@
<c:value name="b" type="float" desc="Second set value">
<c:value-of name="orig_set">
<c:index>
- <c:const value="1" type="integer" desc="Second index" />
+ <c:const value="1" desc="Second index" />
</c:index>
</c:value-of>
</c:value>
@@ -110,14 +110,14 @@
<c:case>
<c:when name="step">
<c:eq>
- <c:const value="0" type="integer" desc="No step indicates identical values" />
+ <c:const value="0" desc="No step indicates identical values" />
</c:eq>
</c:when>
<!-- just return the first value; it's exact and no interpolation is necessary -->
<c:value-of name="set">
<c:index>
- <c:const value="0" type="integer" desc="First index" />
+ <c:const value="0" desc="First index" />
</c:index>
</c:value-of>
</c:case>
diff --git a/core/vector/list.xml b/core/vector/list.xml
index bd9fdc0..8d60feb 100644
--- a/core/vector/list.xml
+++ b/core/vector/list.xml
@@ -73,7 +73,7 @@
<c:case>
<c:when name="__valn">
<c:eq>
- <c:const value="0" type="integer" desc="When there are no more elements in the set" />
+ <c:const value="0" desc="When there are no more elements in the set" />
</c:eq>
</c:when>
diff --git a/core/vector/matrix.xml b/core/vector/matrix.xml
index a5d6cde..d27c356 100644
--- a/core/vector/matrix.xml
+++ b/core/vector/matrix.xml
@@ -80,7 +80,7 @@
<param-value name="@line@" />
</param>
- <rate-each class="@line@" accumulate="none" yields="@yields@" generates="@into@" index="k">
+ <rate-each class="@line@" yields="@yields@" generates="@into@" index="k">
<!-- take the dot product of the two vectors (each part of a larger matrix)
to get the rate for the associated class code -->
<c:product dot="true" label="Dot product between the class and rate vectors for each location will yield the respective rate per location">
diff --git a/core/vector/minmax.xml b/core/vector/minmax.xml
index 7c9fe22..1391eb7 100644
--- a/core/vector/minmax.xml
+++ b/core/vector/minmax.xml
@@ -189,7 +189,7 @@
</c:eq>
</c:when>
- <c:const value="0" type="integer" desc="No value" />
+ <c:const value="0" desc="No value" />
</c:case>
<!-- we have values; perform reduction -->
@@ -287,7 +287,7 @@
<c:arg name="_maxreduce_i">
<c:sum>
<c:value-of name="_maxreduce_i" />
- <c:const value="-1" type="integer" desc="Decrement index by 1" />
+ <c:const value="-1" desc="Decrement index by 1" />
</c:sum>
</c:arg>
</c:apply>
@@ -315,7 +315,7 @@
<param-value name="@generates@" />
</param>
- <rate-each class="@class@" accumulate="none" yields="@yields@" generates="@generates@" index="@index@">
+ <rate-each class="@class@" yields="@yields@" generates="@generates@" index="@index@">
<c:apply name="maxreduce">
<c:arg name="maxreduce_set">
<c:vector>
diff --git a/core/vector/table.xml b/core/vector/table.xml
index 9413b8e..44d0c0c 100644
--- a/core/vector/table.xml
+++ b/core/vector/table.xml
@@ -418,7 +418,7 @@
<c:vector label="Conditional for {@field@}">
<!-- the first element will represent the column (field) index -->
<unless name="@name@">
- <c:const value="@id@" type="integer" desc="Field index" />
+ <c:const value="@id@" desc="Field index" />
</unless>
<if name="@name@">
<c:value-of name="@name@" />
@@ -431,7 +431,7 @@
<!-- the third element will represent whether or not this field is sequential -->
<if name="@sequential@">
- <c:const value="@sequential@" type="boolean" desc="Whether data is sequential" />
+ <c:const value="@sequential@" desc="Whether data is sequential" />
</if>
<unless name="@sequential@">
<!-- if a field name was given, we can get the sequential information
@@ -538,7 +538,7 @@
<c:value-of name="i" />
</c:index>
<c:index>
- <c:const value="0" type="integer" desc="Field id" />
+ <c:const value="0" desc="Field id" />
</c:index>
</c:value-of>
</c:arg>
@@ -550,7 +550,7 @@
<c:value-of name="i" />
</c:index>
<c:index>
- <c:const value="1" type="integer" desc="Field value" />
+ <c:const value="1" desc="Field value" />
</c:index>
</c:value-of>
</c:arg>
@@ -562,7 +562,7 @@
<c:value-of name="i" />
</c:index>
<c:index>
- <c:const value="2" type="integer" desc="Sequential flag" />
+ <c:const value="2" desc="Sequential flag" />
</c:index>
</c:value-of>
</c:arg>
@@ -574,7 +574,7 @@
<c:value-of name="i" />
</c:index>
<c:index>
- <c:const value="3" type="integer" desc="Comparison operator" />
+ <c:const value="3" desc="Comparison operator" />
</c:index>
</c:value-of>
</c:arg>
diff --git a/progtest/Makefile.am b/progtest/Makefile.am
index ffb1b0e..30b58df 100644
--- a/progtest/Makefile.am
+++ b/progtest/Makefile.am
@@ -17,7 +17,7 @@
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
-.PHONY: check test modindex dist browserify FORCE
+.PHONY: bin check test modindex dist browserify FORCE
namespaces=$(shell find src/ -type d)
nsindex=$(addsuffix /index.js, $(namespaces))
@@ -27,6 +27,7 @@ nsindex=$(addsuffix /index.js, $(namespaces))
# used be parent Makefile to prevent doc/ building `all'
all-nodoc: all
all: tame-progtest.js
+bin: all
modindex: $(nsindex)
%/index.js: FORCE
diff --git a/src/current/include/preproc/template.xsl b/src/current/include/preproc/template.xsl
index 6c9ce04..1e52506 100644
--- a/src/current/include/preproc/template.xsl
+++ b/src/current/include/preproc/template.xsl
@@ -823,6 +823,61 @@
</if>
</template>
+<template match="lv:*[@gt]" mode="preproc:apply-template-cmp" priority="5">
+ <param name="negate" select="false()" />
+ <param name="param-value" />
+ <param name="cmp-value" />
+
+ <if test="
+ ( ( $negate = 'true' ) and not( $param-value &gt; $cmp-value ) )
+ or ( ( $negate = 'false' ) and ( $param-value &gt; $cmp-value ) )
+ ">
+
+ <apply-templates select="*" mode="preproc:apply-template" />
+ </if>
+</template>
+
+<template match="lv:*[@gte]" mode="preproc:apply-template-cmp" priority="5">
+ <param name="negate" select="false()" />
+ <param name="param-value" />
+ <param name="cmp-value" />
+
+ <if test="
+ ( ( $negate = 'true' ) and not( $param-value &gt;= $cmp-value ) )
+ or ( ( $negate = 'false' ) and ( $param-value &gt;= $cmp-value ) )
+ ">
+
+ <apply-templates select="*" mode="preproc:apply-template" />
+ </if>
+</template>
+
+<template match="lv:*[@lt]" mode="preproc:apply-template-cmp" priority="5">
+ <param name="negate" select="false()" />
+ <param name="param-value" />
+ <param name="cmp-value" />
+
+ <if test="
+ ( ( $negate = 'true' ) and not( $param-value &lt; $cmp-value ) )
+ or ( ( $negate = 'false' ) and ( $param-value &lt; $cmp-value ) )
+ ">
+
+ <apply-templates select="*" mode="preproc:apply-template" />
+ </if>
+</template>
+
+<template match="lv:*[@lte]" mode="preproc:apply-template-cmp" priority="5">
+ <param name="negate" select="false()" />
+ <param name="param-value" />
+ <param name="cmp-value" />
+
+ <if test="
+ ( ( $negate = 'true' ) and not( $param-value &lt;= $cmp-value ) )
+ or ( ( $negate = 'false' ) and ( $param-value &lt;= $cmp-value ) )
+ ">
+
+ <apply-templates select="*" mode="preproc:apply-template" />
+ </if>
+</template>
<template mode="preproc:apply-template-cmp" priority="5"
match="lv:*[ @prefix ]">
diff --git a/src/current/src/Makefile b/src/current/src/Makefile
index 792810b..c1ba184 100644
--- a/src/current/src/Makefile
+++ b/src/current/src/Makefile
@@ -2,12 +2,13 @@
dslc_src := $(wildcard com/lovullo/dslc/*.java)
dslc_bin := $(dslc_src:.java=.class)
-.PHONY: all all-nodoc dslc clean check info pdf html
+.PHONY: all bin all-nodoc dslc clean check info pdf html
export CLASSPATH=$(DSLC_CLASSPATH)
all: dslc
dslc: dslc.jar
+bin: dslc
%.class: %.java
javac $<
diff --git a/src/js/sha256.js b/src/js/sha256.js
index 2581584..cc82d8c 100644
--- a/src/js/sha256.js
+++ b/src/js/sha256.js
@@ -1,6 +1,6 @@
/**
- * Source: https://raw.githubusercontent.com/geraintluff/sha256/gh-pages/sha256.js
- * This script is in the public domain.
+ * Source: https://github.com/emn178/js-sha256/blob/master/src/sha256.js
+ *
* This comment was added by Ryan Specialty Group.
*
* N.B.: THIS IMPLEMENTATION IS NOT INTENDED FOR SECURE CRYPTOGRAPHIC
@@ -14,99 +14,511 @@
* not been audited.
*/
-var sha256 = function sha256(ascii) {
- function rightRotate(value, amount) {
- return (value>>>amount) | (value<<(32 - amount));
- };
-
- var mathPow = Math.pow;
- var maxWord = mathPow(2, 32);
- var lengthProperty = 'length';
- var i, j; // Used as a counter across the whole file
- var result = '';
-
- var words = [];
- var asciiBitLength = ascii[lengthProperty]*8;
-
- //* caching results is optional - remove/add slash from front of this line to toggle
- // Initial hash value: first 32 bits of the fractional parts of the square roots of the first 8 primes
- // (we actually calculate the first 64, but extra values are just ignored)
- var hash = sha256.h = sha256.h || [];
- // Round constants: first 32 bits of the fractional parts of the cube roots of the first 64 primes
- var k = sha256.k = sha256.k || [];
- var primeCounter = k[lengthProperty];
- /*/
- var hash = [], k = [];
- var primeCounter = 0;
- //*/
-
- var isComposite = {};
- for (var candidate = 2; primeCounter < 64; candidate++) {
- if (!isComposite[candidate]) {
- for (i = 0; i < 313; i += candidate) {
- isComposite[i] = candidate;
- }
- hash[primeCounter] = (mathPow(candidate, .5)*maxWord)|0;
- k[primeCounter++] = (mathPow(candidate, 1/3)*maxWord)|0;
- }
+/**
+ * [js-sha256]{@link https://github.com/emn178/js-sha256}
+ *
+ * @version 0.9.0
+ * @author Chen, Yi-Cyuan [emn178@gmail.com]
+ * @copyright Chen, Yi-Cyuan 2014-2017
+ * @license MIT
+ */
+/*jslint bitwise: true */
+var sha256 = (function () {
+ 'use strict';
+
+ var ERROR = 'input is invalid type';
+ var WINDOW = typeof window === 'object';
+ var root = WINDOW ? window : {};
+ if (root.JS_SHA256_NO_WINDOW) {
+ WINDOW = false;
+ }
+ var WEB_WORKER = !WINDOW && typeof self === 'object';
+ var NODE_JS = !root.JS_SHA256_NO_NODE_JS && typeof process === 'object' && process.versions && process.versions.node;
+ if (NODE_JS) {
+ root = global;
+ } else if (WEB_WORKER) {
+ root = self;
}
-
- ascii += '\x80'; // Append '1' bit (plus zero padding)
- while (ascii[lengthProperty]%64 - 56) ascii += '\x00'; // More zero padding
- for (i = 0; i < ascii[lengthProperty]; i++) {
- j = ascii.charCodeAt(i);
- if (j>>8) return; // ASCII check: only accept characters in range 0-255
- words[i>>2] |= j << ((3 - i)%4)*8;
+ var COMMON_JS = !root.JS_SHA256_NO_COMMON_JS && typeof module === 'object' && module.exports;
+ var AMD = typeof define === 'function' && define.amd;
+ var ARRAY_BUFFER = !root.JS_SHA256_NO_ARRAY_BUFFER && typeof ArrayBuffer !== 'undefined';
+ var HEX_CHARS = '0123456789abcdef'.split('');
+ var EXTRA = [-2147483648, 8388608, 32768, 128];
+ var SHIFT = [24, 16, 8, 0];
+ var K = [
+ 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
+ 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
+ 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
+ 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
+ 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
+ 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
+ 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
+ 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
+ ];
+ var OUTPUT_TYPES = ['hex', 'array', 'digest', 'arrayBuffer'];
+
+ var blocks = [];
+
+ if (root.JS_SHA256_NO_NODE_JS || !Array.isArray) {
+ Array.isArray = function (obj) {
+ return Object.prototype.toString.call(obj) === '[object Array]';
+ };
}
- words[words[lengthProperty]] = ((asciiBitLength/maxWord)|0);
- words[words[lengthProperty]] = (asciiBitLength)
-
- // process each chunk
- for (j = 0; j < words[lengthProperty];) {
- var w = words.slice(j, j += 16); // The message is expanded into 64 words as part of the iteration
- var oldHash = hash;
- // This is now the "working hash", often labelled as variables a...g
- // (we have to truncate as well, otherwise extra entries at the end accumulate
- hash = hash.slice(0, 8);
-
- for (i = 0; i < 64; i++) {
- var i2 = i + j;
- // Expand the message into 64 words
- // Used below if
- var w15 = w[i - 15], w2 = w[i - 2];
-
- // Iterate
- var a = hash[0], e = hash[4];
- var temp1 = hash[7]
- + (rightRotate(e, 6) ^ rightRotate(e, 11) ^ rightRotate(e, 25)) // S1
- + ((e&hash[5])^((~e)&hash[6])) // ch
- + k[i]
- // Expand the message schedule if needed
- + (w[i] = (i < 16) ? w[i] : (
- w[i - 16]
- + (rightRotate(w15, 7) ^ rightRotate(w15, 18) ^ (w15>>>3)) // s0
- + w[i - 7]
- + (rightRotate(w2, 17) ^ rightRotate(w2, 19) ^ (w2>>>10)) // s1
- )|0
- );
- // This is only used once, so *could* be moved below, but it only saves 4 bytes and makes things unreadble
- var temp2 = (rightRotate(a, 2) ^ rightRotate(a, 13) ^ rightRotate(a, 22)) // S0
- + ((a&hash[1])^(a&hash[2])^(hash[1]&hash[2])); // maj
-
- hash = [(temp1 + temp2)|0].concat(hash); // We don't bother trimming off the extra ones, they're harmless as long as we're truncating when we do the slice()
- hash[4] = (hash[4] + temp1)|0;
+
+ if (ARRAY_BUFFER && (root.JS_SHA256_NO_ARRAY_BUFFER_IS_VIEW || !ArrayBuffer.isView)) {
+ ArrayBuffer.isView = function (obj) {
+ return typeof obj === 'object' && obj.buffer && obj.buffer.constructor === ArrayBuffer;
+ };
+ }
+
+ var createOutputMethod = function (outputType, is224) {
+ return function (message) {
+ return new Sha256(is224, true).update(message)[outputType]();
+ };
+ };
+
+ var createMethod = function (is224) {
+ var method = createOutputMethod('hex', is224);
+ if (NODE_JS) {
+ method = nodeWrap(method, is224);
+ }
+ method.create = function () {
+ return new Sha256(is224);
+ };
+ method.update = function (message) {
+ return method.create().update(message);
+ };
+ for (var i = 0; i < OUTPUT_TYPES.length; ++i) {
+ var type = OUTPUT_TYPES[i];
+ method[type] = createOutputMethod(type, is224);
+ }
+ return method;
+ };
+
+ var nodeWrap = function (method, is224) {
+ var crypto = eval("require('crypto')");
+ var Buffer = eval("require('buffer').Buffer");
+ var algorithm = is224 ? 'sha224' : 'sha256';
+ var nodeMethod = function (message) {
+ if (typeof message === 'string') {
+ return crypto.createHash(algorithm).update(message, 'utf8').digest('hex');
+ } else {
+ if (message === null || message === undefined) {
+ throw new Error(ERROR);
+ } else if (message.constructor === ArrayBuffer) {
+ message = new Uint8Array(message);
+ }
}
-
- for (i = 0; i < 8; i++) {
- hash[i] = (hash[i] + oldHash[i])|0;
+ if (Array.isArray(message) || ArrayBuffer.isView(message) ||
+ message.constructor === Buffer) {
+ return crypto.createHash(algorithm).update(new Buffer(message)).digest('hex');
+ } else {
+ return method(message);
}
+ };
+ return nodeMethod;
+ };
+
+ var createHmacOutputMethod = function (outputType, is224) {
+ return function (key, message) {
+ return new HmacSha256(key, is224, true).update(message)[outputType]();
+ };
+ };
+
+ var createHmacMethod = function (is224) {
+ var method = createHmacOutputMethod('hex', is224);
+ method.create = function (key) {
+ return new HmacSha256(key, is224);
+ };
+ method.update = function (key, message) {
+ return method.create(key).update(message);
+ };
+ for (var i = 0; i < OUTPUT_TYPES.length; ++i) {
+ var type = OUTPUT_TYPES[i];
+ method[type] = createHmacOutputMethod(type, is224);
+ }
+ return method;
+ };
+
+ function Sha256(is224, sharedMemory) {
+ if (sharedMemory) {
+ blocks[0] = blocks[16] = blocks[1] = blocks[2] = blocks[3] =
+ blocks[4] = blocks[5] = blocks[6] = blocks[7] =
+ blocks[8] = blocks[9] = blocks[10] = blocks[11] =
+ blocks[12] = blocks[13] = blocks[14] = blocks[15] = 0;
+ this.blocks = blocks;
+ } else {
+ this.blocks = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
+ }
+
+ if (is224) {
+ this.h0 = 0xc1059ed8;
+ this.h1 = 0x367cd507;
+ this.h2 = 0x3070dd17;
+ this.h3 = 0xf70e5939;
+ this.h4 = 0xffc00b31;
+ this.h5 = 0x68581511;
+ this.h6 = 0x64f98fa7;
+ this.h7 = 0xbefa4fa4;
+ } else { // 256
+ this.h0 = 0x6a09e667;
+ this.h1 = 0xbb67ae85;
+ this.h2 = 0x3c6ef372;
+ this.h3 = 0xa54ff53a;
+ this.h4 = 0x510e527f;
+ this.h5 = 0x9b05688c;
+ this.h6 = 0x1f83d9ab;
+ this.h7 = 0x5be0cd19;
+ }
+
+ this.block = this.start = this.bytes = this.hBytes = 0;
+ this.finalized = this.hashed = false;
+ this.first = true;
+ this.is224 = is224;
}
-
- for (i = 0; i < 8; i++) {
- for (j = 3; j + 1; j--) {
- var b = (hash[i]>>(j*8))&255;
- result += ((b < 16) ? 0 : '') + b.toString(16);
+
+ Sha256.prototype.update = function (message) {
+ if (this.finalized) {
+ return;
+ }
+ var notString, type = typeof message;
+ if (type !== 'string') {
+ if (type === 'object') {
+ if (message === null) {
+ throw new Error(ERROR);
+ } else if (ARRAY_BUFFER && message.constructor === ArrayBuffer) {
+ message = new Uint8Array(message);
+ } else if (!Array.isArray(message)) {
+ if (!ARRAY_BUFFER || !ArrayBuffer.isView(message)) {
+ throw new Error(ERROR);
+ }
+ }
+ } else {
+ throw new Error(ERROR);
+ }
+ notString = true;
+ }
+ var code, index = 0, i, length = message.length, blocks = this.blocks;
+
+ while (index < length) {
+ if (this.hashed) {
+ this.hashed = false;
+ blocks[0] = this.block;
+ blocks[16] = blocks[1] = blocks[2] = blocks[3] =
+ blocks[4] = blocks[5] = blocks[6] = blocks[7] =
+ blocks[8] = blocks[9] = blocks[10] = blocks[11] =
+ blocks[12] = blocks[13] = blocks[14] = blocks[15] = 0;
+ }
+
+ if (notString) {
+ for (i = this.start; index < length && i < 64; ++index) {
+ blocks[i >> 2] |= message[index] << SHIFT[i++ & 3];
+ }
+ } else {
+ for (i = this.start; index < length && i < 64; ++index) {
+ code = message.charCodeAt(index);
+ if (code < 0x80) {
+ blocks[i >> 2] |= code << SHIFT[i++ & 3];
+ } else if (code < 0x800) {
+ blocks[i >> 2] |= (0xc0 | (code >> 6)) << SHIFT[i++ & 3];
+ blocks[i >> 2] |= (0x80 | (code & 0x3f)) << SHIFT[i++ & 3];
+ } else if (code < 0xd800 || code >= 0xe000) {
+ blocks[i >> 2] |= (0xe0 | (code >> 12)) << SHIFT[i++ & 3];
+ blocks[i >> 2] |= (0x80 | ((code >> 6) & 0x3f)) << SHIFT[i++ & 3];
+ blocks[i >> 2] |= (0x80 | (code & 0x3f)) << SHIFT[i++ & 3];
+ } else {
+ code = 0x10000 + (((code & 0x3ff) << 10) | (message.charCodeAt(++index) & 0x3ff));
+ blocks[i >> 2] |= (0xf0 | (code >> 18)) << SHIFT[i++ & 3];
+ blocks[i >> 2] |= (0x80 | ((code >> 12) & 0x3f)) << SHIFT[i++ & 3];
+ blocks[i >> 2] |= (0x80 | ((code >> 6) & 0x3f)) << SHIFT[i++ & 3];
+ blocks[i >> 2] |= (0x80 | (code & 0x3f)) << SHIFT[i++ & 3];
+ }
+ }
+ }
+
+ this.lastByteIndex = i;
+ this.bytes += i - this.start;
+ if (i >= 64) {
+ this.block = blocks[16];
+ this.start = i - 64;
+ this.hash();
+ this.hashed = true;
+ } else {
+ this.start = i;
}
+ }
+ if (this.bytes > 4294967295) {
+ this.hBytes += this.bytes / 4294967296 << 0;
+ this.bytes = this.bytes % 4294967296;
+ }
+ return this;
+ };
+
+ Sha256.prototype.finalize = function () {
+ if (this.finalized) {
+ return;
+ }
+ this.finalized = true;
+ var blocks = this.blocks, i = this.lastByteIndex;
+ blocks[16] = this.block;
+ blocks[i >> 2] |= EXTRA[i & 3];
+ this.block = blocks[16];
+ if (i >= 56) {
+ if (!this.hashed) {
+ this.hash();
+ }
+ blocks[0] = this.block;
+ blocks[16] = blocks[1] = blocks[2] = blocks[3] =
+ blocks[4] = blocks[5] = blocks[6] = blocks[7] =
+ blocks[8] = blocks[9] = blocks[10] = blocks[11] =
+ blocks[12] = blocks[13] = blocks[14] = blocks[15] = 0;
+ }
+ blocks[14] = this.hBytes << 3 | this.bytes >>> 29;
+ blocks[15] = this.bytes << 3;
+ this.hash();
+ };
+
+ Sha256.prototype.hash = function () {
+ var a = this.h0, b = this.h1, c = this.h2, d = this.h3, e = this.h4, f = this.h5, g = this.h6,
+ h = this.h7, blocks = this.blocks, j, s0, s1, maj, t1, t2, ch, ab, da, cd, bc;
+
+ for (j = 16; j < 64; ++j) {
+ // rightrotate
+ t1 = blocks[j - 15];
+ s0 = ((t1 >>> 7) | (t1 << 25)) ^ ((t1 >>> 18) | (t1 << 14)) ^ (t1 >>> 3);
+ t1 = blocks[j - 2];
+ s1 = ((t1 >>> 17) | (t1 << 15)) ^ ((t1 >>> 19) | (t1 << 13)) ^ (t1 >>> 10);
+ blocks[j] = blocks[j - 16] + s0 + blocks[j - 7] + s1 << 0;
+ }
+
+ bc = b & c;
+ for (j = 0; j < 64; j += 4) {
+ if (this.first) {
+ if (this.is224) {
+ ab = 300032;
+ t1 = blocks[0] - 1413257819;
+ h = t1 - 150054599 << 0;
+ d = t1 + 24177077 << 0;
+ } else {
+ ab = 704751109;
+ t1 = blocks[0] - 210244248;
+ h = t1 - 1521486534 << 0;
+ d = t1 + 143694565 << 0;
+ }
+ this.first = false;
+ } else {
+ s0 = ((a >>> 2) | (a << 30)) ^ ((a >>> 13) | (a << 19)) ^ ((a >>> 22) | (a << 10));
+ s1 = ((e >>> 6) | (e << 26)) ^ ((e >>> 11) | (e << 21)) ^ ((e >>> 25) | (e << 7));
+ ab = a & b;
+ maj = ab ^ (a & c) ^ bc;
+ ch = (e & f) ^ (~e & g);
+ t1 = h + s1 + ch + K[j] + blocks[j];
+ t2 = s0 + maj;
+ h = d + t1 << 0;
+ d = t1 + t2 << 0;
+ }
+ s0 = ((d >>> 2) | (d << 30)) ^ ((d >>> 13) | (d << 19)) ^ ((d >>> 22) | (d << 10));
+ s1 = ((h >>> 6) | (h << 26)) ^ ((h >>> 11) | (h << 21)) ^ ((h >>> 25) | (h << 7));
+ da = d & a;
+ maj = da ^ (d & b) ^ ab;
+ ch = (h & e) ^ (~h & f);
+ t1 = g + s1 + ch + K[j + 1] + blocks[j + 1];
+ t2 = s0 + maj;
+ g = c + t1 << 0;
+ c = t1 + t2 << 0;
+ s0 = ((c >>> 2) | (c << 30)) ^ ((c >>> 13) | (c << 19)) ^ ((c >>> 22) | (c << 10));
+ s1 = ((g >>> 6) | (g << 26)) ^ ((g >>> 11) | (g << 21)) ^ ((g >>> 25) | (g << 7));
+ cd = c & d;
+ maj = cd ^ (c & a) ^ da;
+ ch = (g & h) ^ (~g & e);
+ t1 = f + s1 + ch + K[j + 2] + blocks[j + 2];
+ t2 = s0 + maj;
+ f = b + t1 << 0;
+ b = t1 + t2 << 0;
+ s0 = ((b >>> 2) | (b << 30)) ^ ((b >>> 13) | (b << 19)) ^ ((b >>> 22) | (b << 10));
+ s1 = ((f >>> 6) | (f << 26)) ^ ((f >>> 11) | (f << 21)) ^ ((f >>> 25) | (f << 7));
+ bc = b & c;
+ maj = bc ^ (b & d) ^ cd;
+ ch = (f & g) ^ (~f & h);
+ t1 = e + s1 + ch + K[j + 3] + blocks[j + 3];
+ t2 = s0 + maj;
+ e = a + t1 << 0;
+ a = t1 + t2 << 0;
+ }
+
+ this.h0 = this.h0 + a << 0;
+ this.h1 = this.h1 + b << 0;
+ this.h2 = this.h2 + c << 0;
+ this.h3 = this.h3 + d << 0;
+ this.h4 = this.h4 + e << 0;
+ this.h5 = this.h5 + f << 0;
+ this.h6 = this.h6 + g << 0;
+ this.h7 = this.h7 + h << 0;
+ };
+
+ Sha256.prototype.hex = function () {
+ this.finalize();
+
+ var h0 = this.h0, h1 = this.h1, h2 = this.h2, h3 = this.h3, h4 = this.h4, h5 = this.h5,
+ h6 = this.h6, h7 = this.h7;
+
+ var hex = HEX_CHARS[(h0 >> 28) & 0x0F] + HEX_CHARS[(h0 >> 24) & 0x0F] +
+ HEX_CHARS[(h0 >> 20) & 0x0F] + HEX_CHARS[(h0 >> 16) & 0x0F] +
+ HEX_CHARS[(h0 >> 12) & 0x0F] + HEX_CHARS[(h0 >> 8) & 0x0F] +
+ HEX_CHARS[(h0 >> 4) & 0x0F] + HEX_CHARS[h0 & 0x0F] +
+ HEX_CHARS[(h1 >> 28) & 0x0F] + HEX_CHARS[(h1 >> 24) & 0x0F] +
+ HEX_CHARS[(h1 >> 20) & 0x0F] + HEX_CHARS[(h1 >> 16) & 0x0F] +
+ HEX_CHARS[(h1 >> 12) & 0x0F] + HEX_CHARS[(h1 >> 8) & 0x0F] +
+ HEX_CHARS[(h1 >> 4) & 0x0F] + HEX_CHARS[h1 & 0x0F] +
+ HEX_CHARS[(h2 >> 28) & 0x0F] + HEX_CHARS[(h2 >> 24) & 0x0F] +
+ HEX_CHARS[(h2 >> 20) & 0x0F] + HEX_CHARS[(h2 >> 16) & 0x0F] +
+ HEX_CHARS[(h2 >> 12) & 0x0F] + HEX_CHARS[(h2 >> 8) & 0x0F] +
+ HEX_CHARS[(h2 >> 4) & 0x0F] + HEX_CHARS[h2 & 0x0F] +
+ HEX_CHARS[(h3 >> 28) & 0x0F] + HEX_CHARS[(h3 >> 24) & 0x0F] +
+ HEX_CHARS[(h3 >> 20) & 0x0F] + HEX_CHARS[(h3 >> 16) & 0x0F] +
+ HEX_CHARS[(h3 >> 12) & 0x0F] + HEX_CHARS[(h3 >> 8) & 0x0F] +
+ HEX_CHARS[(h3 >> 4) & 0x0F] + HEX_CHARS[h3 & 0x0F] +
+ HEX_CHARS[(h4 >> 28) & 0x0F] + HEX_CHARS[(h4 >> 24) & 0x0F] +
+ HEX_CHARS[(h4 >> 20) & 0x0F] + HEX_CHARS[(h4 >> 16) & 0x0F] +
+ HEX_CHARS[(h4 >> 12) & 0x0F] + HEX_CHARS[(h4 >> 8) & 0x0F] +
+ HEX_CHARS[(h4 >> 4) & 0x0F] + HEX_CHARS[h4 & 0x0F] +
+ HEX_CHARS[(h5 >> 28) & 0x0F] + HEX_CHARS[(h5 >> 24) & 0x0F] +
+ HEX_CHARS[(h5 >> 20) & 0x0F] + HEX_CHARS[(h5 >> 16) & 0x0F] +
+ HEX_CHARS[(h5 >> 12) & 0x0F] + HEX_CHARS[(h5 >> 8) & 0x0F] +
+ HEX_CHARS[(h5 >> 4) & 0x0F] + HEX_CHARS[h5 & 0x0F] +
+ HEX_CHARS[(h6 >> 28) & 0x0F] + HEX_CHARS[(h6 >> 24) & 0x0F] +
+ HEX_CHARS[(h6 >> 20) & 0x0F] + HEX_CHARS[(h6 >> 16) & 0x0F] +
+ HEX_CHARS[(h6 >> 12) & 0x0F] + HEX_CHARS[(h6 >> 8) & 0x0F] +
+ HEX_CHARS[(h6 >> 4) & 0x0F] + HEX_CHARS[h6 & 0x0F];
+ if (!this.is224) {
+ hex += HEX_CHARS[(h7 >> 28) & 0x0F] + HEX_CHARS[(h7 >> 24) & 0x0F] +
+ HEX_CHARS[(h7 >> 20) & 0x0F] + HEX_CHARS[(h7 >> 16) & 0x0F] +
+ HEX_CHARS[(h7 >> 12) & 0x0F] + HEX_CHARS[(h7 >> 8) & 0x0F] +
+ HEX_CHARS[(h7 >> 4) & 0x0F] + HEX_CHARS[h7 & 0x0F];
+ }
+ return hex;
+ };
+
+ Sha256.prototype.toString = Sha256.prototype.hex;
+
+ Sha256.prototype.digest = function () {
+ this.finalize();
+
+ var h0 = this.h0, h1 = this.h1, h2 = this.h2, h3 = this.h3, h4 = this.h4, h5 = this.h5,
+ h6 = this.h6, h7 = this.h7;
+
+ var arr = [
+ (h0 >> 24) & 0xFF, (h0 >> 16) & 0xFF, (h0 >> 8) & 0xFF, h0 & 0xFF,
+ (h1 >> 24) & 0xFF, (h1 >> 16) & 0xFF, (h1 >> 8) & 0xFF, h1 & 0xFF,
+ (h2 >> 24) & 0xFF, (h2 >> 16) & 0xFF, (h2 >> 8) & 0xFF, h2 & 0xFF,
+ (h3 >> 24) & 0xFF, (h3 >> 16) & 0xFF, (h3 >> 8) & 0xFF, h3 & 0xFF,
+ (h4 >> 24) & 0xFF, (h4 >> 16) & 0xFF, (h4 >> 8) & 0xFF, h4 & 0xFF,
+ (h5 >> 24) & 0xFF, (h5 >> 16) & 0xFF, (h5 >> 8) & 0xFF, h5 & 0xFF,
+ (h6 >> 24) & 0xFF, (h6 >> 16) & 0xFF, (h6 >> 8) & 0xFF, h6 & 0xFF
+ ];
+ if (!this.is224) {
+ arr.push((h7 >> 24) & 0xFF, (h7 >> 16) & 0xFF, (h7 >> 8) & 0xFF, h7 & 0xFF);
+ }
+ return arr;
+ };
+
+ Sha256.prototype.array = Sha256.prototype.digest;
+
+ Sha256.prototype.arrayBuffer = function () {
+ this.finalize();
+
+ var buffer = new ArrayBuffer(this.is224 ? 28 : 32);
+ var dataView = new DataView(buffer);
+ dataView.setUint32(0, this.h0);
+ dataView.setUint32(4, this.h1);
+ dataView.setUint32(8, this.h2);
+ dataView.setUint32(12, this.h3);
+ dataView.setUint32(16, this.h4);
+ dataView.setUint32(20, this.h5);
+ dataView.setUint32(24, this.h6);
+ if (!this.is224) {
+ dataView.setUint32(28, this.h7);
+ }
+ return buffer;
+ };
+
+ function HmacSha256(key, is224, sharedMemory) {
+ var i, type = typeof key;
+ if (type === 'string') {
+ var bytes = [], length = key.length, index = 0, code;
+ for (i = 0; i < length; ++i) {
+ code = key.charCodeAt(i);
+ if (code < 0x80) {
+ bytes[index++] = code;
+ } else if (code < 0x800) {
+ bytes[index++] = (0xc0 | (code >> 6));
+ bytes[index++] = (0x80 | (code & 0x3f));
+ } else if (code < 0xd800 || code >= 0xe000) {
+ bytes[index++] = (0xe0 | (code >> 12));
+ bytes[index++] = (0x80 | ((code >> 6) & 0x3f));
+ bytes[index++] = (0x80 | (code & 0x3f));
+ } else {
+ code = 0x10000 + (((code & 0x3ff) << 10) | (key.charCodeAt(++i) & 0x3ff));
+ bytes[index++] = (0xf0 | (code >> 18));
+ bytes[index++] = (0x80 | ((code >> 12) & 0x3f));
+ bytes[index++] = (0x80 | ((code >> 6) & 0x3f));
+ bytes[index++] = (0x80 | (code & 0x3f));
+ }
+ }
+ key = bytes;
+ } else {
+ if (type === 'object') {
+ if (key === null) {
+ throw new Error(ERROR);
+ } else if (ARRAY_BUFFER && key.constructor === ArrayBuffer) {
+ key = new Uint8Array(key);
+ } else if (!Array.isArray(key)) {
+ if (!ARRAY_BUFFER || !ArrayBuffer.isView(key)) {
+ throw new Error(ERROR);
+ }
+ }
+ } else {
+ throw new Error(ERROR);
+ }
+ }
+
+ if (key.length > 64) {
+ key = (new Sha256(is224, true)).update(key).array();
+ }
+
+ var oKeyPad = [], iKeyPad = [];
+ for (i = 0; i < 64; ++i) {
+ var b = key[i] || 0;
+ oKeyPad[i] = 0x5c ^ b;
+ iKeyPad[i] = 0x36 ^ b;
+ }
+
+ Sha256.call(this, is224, sharedMemory);
+
+ this.update(iKeyPad);
+ this.oKeyPad = oKeyPad;
+ this.inner = true;
+ this.sharedMemory = sharedMemory;
}
- return result;
-};
+ HmacSha256.prototype = new Sha256();
+
+ HmacSha256.prototype.finalize = function () {
+ Sha256.prototype.finalize.call(this);
+ if (this.inner) {
+ this.inner = false;
+ var innerHash = this.array();
+ Sha256.call(this, this.is224, this.sharedMemory);
+ this.update(this.oKeyPad);
+ this.update(innerHash);
+ Sha256.prototype.finalize.call(this);
+ }
+ };
+
+ var exports = createMethod();
+ exports.sha256 = exports;
+ exports.sha224 = createMethod(true);
+ exports.sha256.hmac = createHmacMethod();
+ exports.sha224.hmac = createHmacMethod(true);
+
+ return exports.sha256;
+ })();
diff --git a/tamer/Cargo.toml b/tamer/Cargo.toml
index bebb7fa..52fc815 100644
--- a/tamer/Cargo.toml
+++ b/tamer/Cargo.toml
@@ -25,15 +25,15 @@ lto = true
[dependencies]
arrayvec = ">= 0.7.1"
bumpalo = ">= 2.6.0"
+exitcode = "1.1.2"
fxhash = ">= 0.2.1"
-petgraph = "0.6.0"
-quick-xml = ">= 0.23.0-alpha3"
getopts = "0.2"
-exitcode = "1.1.2"
-petgraph-graphml = "3.0.0"
-static_assertions = ">= 1.1.0"
memchr = ">= 2.3.4" # quick-xml expects =2.3.4 at the time
paste = ">= 1.0.5"
+petgraph = "0.6.0"
+petgraph-graphml = "3.0.0"
+quick-xml = ">= 0.23.0-alpha3"
+static_assertions = ">= 1.1.0"
unicode-width = "0.1.5"
# Feature flags can be specified using `./configure FEATURES=foo,bar,baz`.
@@ -46,3 +46,10 @@ unicode-width = "0.1.5"
# and the flag removed.
[features]
+# Cause `Parser` to emit a verbose, human-readable trace to stderr for every
+# token. This is not intended to be machine-readable, so please do not
+# parse it.
+#
+# This is enabled automatically for the `test` profile.
+parser-trace-stderr = []
+
diff --git a/tamer/Makefile.am b/tamer/Makefile.am
index 75cf823..acab8d5 100644
--- a/tamer/Makefile.am
+++ b/tamer/Makefile.am
@@ -47,7 +47,7 @@ html-am:
# note that 'cargo check' is something else; see 'cargo --help'
test: check
check-am: check-fmt
- @CARGO@ +@RUST_TC@ @CARGO_FLAGS@ test @FEATURES@
+ @CARGO@ +@RUST_TC@ @CARGO_FLAGS@ test --quiet @FEATURES@
check-fmt:
@CARGO@ +@RUST_TC@ @CARGO_FLAGS@ fmt -- --check
diff --git a/tamer/benches/xir.rs b/tamer/benches/xir.rs
index 2010ac1..2e62771 100644
--- a/tamer/benches/xir.rs
+++ b/tamer/benches/xir.rs
@@ -117,20 +117,6 @@ mod name {
}
}
-mod ws {
- use super::*;
- use tamer::xir::Whitespace;
-
- #[bench]
- fn whitespace_1000(bench: &mut Bencher) {
- bench.iter(|| {
- (0..1000)
- .map(|_| Whitespace::try_from(" \t "))
- .for_each(drop);
- });
- }
-}
-
mod writer {
use super::*;
use quick_xml::{
diff --git a/tamer/configure.ac b/tamer/configure.ac
index bcde316..094c2ce 100644
--- a/tamer/configure.ac
+++ b/tamer/configure.ac
@@ -56,7 +56,7 @@ test -n "$CARGO" || AC_MSG_ERROR([cargo not found])
AC_SUBST([CARGO_FLAGS], "--frozen --offline")
# This is a nightly version at the time of writing
-rustc_ver_req=1.62
+rustc_ver_req=1.65
AC_CHECK_PROGS(RUSTC, [rustc])
AC_MSG_CHECKING([rustc $RUST_TC version >= $rustc_ver_req])
diff --git a/tamer/src/asg/air.rs b/tamer/src/asg/air.rs
index e70459a..5df3dcd 100644
--- a/tamer/src/asg/air.rs
+++ b/tamer/src/asg/air.rs
@@ -73,6 +73,10 @@ pub enum AirToken {
}
impl Token for AirToken {
+ fn ir_name() -> &'static str {
+ "AIR"
+ }
+
fn span(&self) -> crate::span::Span {
// TODO: This can be provided once the xmlo files store source
// locations for symbols.
@@ -162,7 +166,7 @@ impl ParseState for AirAggregate {
}
}
- fn is_accepting(&self) -> bool {
+ fn is_accepting(&self, _: &Self::Context) -> bool {
*self == Self::Empty
}
}
@@ -209,7 +213,7 @@ mod test {
assert_eq!(Some(Ok(Parsed::Incomplete)), sut.next());
- let asg = sut.finalize().unwrap();
+ let asg = sut.finalize().unwrap().into_context();
let ident_node =
asg.lookup(sym).expect("identifier was not added to graph");
@@ -250,7 +254,7 @@ mod test {
assert_eq!(Some(Ok(Parsed::Incomplete)), sut.next());
- let asg = sut.finalize().unwrap();
+ let asg = sut.finalize().unwrap().into_context();
let ident_node =
asg.lookup(sym).expect("identifier was not added to graph");
@@ -288,7 +292,7 @@ mod test {
assert_eq!(Some(Ok(Parsed::Incomplete)), sut.next());
- let asg = sut.finalize().unwrap();
+ let asg = sut.finalize().unwrap().into_context();
let ident_node = asg
.lookup(ident)
@@ -321,7 +325,7 @@ mod test {
assert_eq!(Some(Ok(Parsed::Incomplete)), sut.next()); // IdentDecl
assert_eq!(Some(Ok(Parsed::Incomplete)), sut.next()); // IdentFragment
- let asg = sut.finalize().unwrap();
+ let asg = sut.finalize().unwrap().into_context();
let ident_node =
asg.lookup(sym).expect("identifier was not added to graph");
@@ -358,7 +362,7 @@ mod test {
assert_eq!(Some(Ok(Parsed::Incomplete)), sut.next());
- let asg = sut.finalize().unwrap();
+ let asg = sut.finalize().unwrap().into_context();
let ident_node = asg
.lookup(sym)
@@ -397,7 +401,7 @@ mod test {
assert_eq!(Some(Ok(Parsed::Incomplete)), sut.next()); // IdentDecl
assert_eq!(Some(Ok(Parsed::Incomplete)), sut.next()); // IdentRoot
- let asg = sut.finalize().unwrap();
+ let asg = sut.finalize().unwrap().into_context();
let ident_node = asg
.lookup(sym)
diff --git a/tamer/src/bin/tamec.rs b/tamer/src/bin/tamec.rs
index 336ed7c..ab34506 100644
--- a/tamer/src/bin/tamec.rs
+++ b/tamer/src/bin/tamec.rs
@@ -28,17 +28,26 @@ use getopts::{Fail, Options};
use std::{
env,
error::Error,
- ffi::OsStr,
- fmt::{self, Display},
- fs, io,
+ fmt::{self, Display, Write},
+ fs::{self, File},
+ io::{self, BufReader, BufWriter},
path::Path,
};
use tamer::{
diagnose::{
AnnotatedSpan, Diagnostic, FsSpanResolver, Reporter, VisualReporter,
},
- parse::{ParseError, Parsed, UnknownToken},
- xir,
+ nir::{DesugarNir, DesugarNirError, SugaredNir, XirfToNir, XirfToNirError},
+ parse::{
+ Lower, ParseError, Parsed, ParsedObject, ParsedResult, UnknownToken,
+ },
+ xir::{
+ self,
+ flat::{RefinedText, XirToXirf, XirToXirfError, XirfToken},
+ reader::XmlXirReader,
+ writer::XmlWriter,
+ DefaultEscaper, Error as XirError, Token as XirToken,
+ },
};
/// Types of commands
@@ -47,69 +56,130 @@ enum Command {
Usage,
}
+/// Create a [`XmlXirReader`] for a source file.
+///
+/// The provided escaper must be shared between all readers and writers in
+/// order to benefit from its caching.
+fn src_reader<'a>(
+ input: &'a String,
+ escaper: &'a DefaultEscaper,
+) -> Result<XmlXirReader<'a, BufReader<File>>, UnrecoverableError> {
+ use tamer::fs::{File, PathFile};
+
+ let source = Path::new(input);
+
+ let PathFile(_, file, ctx): PathFile<BufReader<fs::File>> =
+ PathFile::open(source)?;
+
+ Ok(XmlXirReader::new(file, escaper, ctx))
+}
+
+/// Write each parsed token to the provided buffer.
+///
+/// This is intended to be a temporary function that exists during a
+/// transition period between the XSLT-based TAME and TAMER.
+/// Writing XIR proves that the source file is being successfully parsed and
+/// helps to evaluate system performance.
+fn copy_xml_to<'e, W: io::Write + 'e>(
+ mut fout: W,
+ escaper: &'e DefaultEscaper,
+) -> impl FnMut(&ParsedResult<ParsedObject<XirToken, XirError>>) + 'e {
+ let mut xmlwriter = Default::default();
+
+ move |tok_result| match tok_result {
+ Ok(Parsed::Object(tok)) => {
+ xmlwriter = tok.write(&mut fout, xmlwriter, escaper).unwrap();
+ }
+ _ => (),
+ }
+}
+
+/// Compile a source file,
+/// writing to the provided destination path.
+///
+/// NB: Output is presently a _copy_ of the input,
+/// with formatting partially removed.
+fn compile<R: Reporter>(
+ src_path: &String,
+ dest_path: &String,
+ reporter: &mut R,
+) -> Result<(), UnrecoverableError> {
+ let dest = Path::new(&dest_path);
+ let fout = BufWriter::new(fs::File::create(dest)?);
+
+ let escaper = DefaultEscaper::default();
+
+ let mut ebuf = String::new();
+
+ fn report_err<R: Reporter>(
+ e: &RecoverableError,
+ reporter: &mut R,
+ ebuf: &mut String,
+ ) -> Result<(), UnrecoverableError> {
+ // See below note about buffering.
+ ebuf.clear();
+ writeln!(ebuf, "{}", reporter.render(e))?;
+ println!("{ebuf}");
+
+ Ok(())
+ }
+
+ // TODO: We're just echoing back out XIR,
+ // which will be the same sans some formatting.
+ let src = &mut src_reader(src_path, &escaper)?
+ .inspect(copy_xml_to(fout, &escaper))
+ .map(|result| result.map_err(RecoverableError::from));
+
+ let _ = Lower::<
+ ParsedObject<XirToken, XirError>,
+ XirToXirf<64, RefinedText>,
+ _,
+ >::lower::<_, UnrecoverableError>(src, |toks| {
+ Lower::<XirToXirf<64, RefinedText>, XirfToNir, _>::lower(toks, |snir| {
+ Lower::<XirfToNir, DesugarNir, _>::lower(snir, |nir| {
+ nir.fold(Ok(()), |x, result| match result {
+ Ok(_) => x,
+ Err(e) => {
+ report_err(&e, reporter, &mut ebuf)?;
+ x
+ }
+ })
+ })
+ })
+ })?;
+
+ match reporter.has_errors() {
+ false => Ok(()),
+ true => Err(UnrecoverableError::ErrorsDuringLowering(
+ reporter.error_count(),
+ )),
+ }
+}
+
/// Entrypoint for the compiler
-pub fn main() -> Result<(), TamecError> {
+pub fn main() -> Result<(), UnrecoverableError> {
let args: Vec<String> = env::args().collect();
let program = &args[0];
let opts = get_opts();
let usage = opts.usage(&format!("Usage: {} [OPTIONS] INPUT", program));
match parse_options(opts, args) {
- Ok(Command::Compile(input, _, output)) => {
- let source = Path::new(&input);
- if source.extension() != Some(OsStr::new("xml")) {
- panic!("{}: file format not recognized", input);
- }
+ Ok(Command::Compile(src_path, _, dest_path)) => {
+ let mut reporter = VisualReporter::new(FsSpanResolver);
- let dest = Path::new(&output);
-
- Ok(())
- .and_then(|_| {
- use std::io::{BufReader, BufWriter};
- use tamer::{
- fs::{File, PathFile},
- iter::into_iter_while_ok,
- xir::{
- reader::XmlXirReader, writer::XmlWriter,
- DefaultEscaper,
- },
- };
-
- let escaper = DefaultEscaper::default();
- let mut fout = BufWriter::new(fs::File::create(dest)?);
-
- let PathFile(_, file, ctx): PathFile<BufReader<fs::File>> =
- PathFile::open(source)?;
-
- // Parse into XIR and re-lower into XML,
- // which is similar to a copy but proves that we're able
- // to parse source files.
- into_iter_while_ok(
- XmlXirReader::new(file, &escaper, ctx),
- |toks| {
- toks.filter_map(|parsed| match parsed {
- Parsed::Object(tok) => Some(tok),
- _ => None,
- })
- .write(&mut fout, Default::default(), &escaper)
- .map_err(TamecError::from)
- },
- )?;
-
- Ok(())
- })
- .or_else(|e: TamecError| {
- let mut reporter = VisualReporter::new(FsSpanResolver);
-
- // POC: Rendering to a string ensures buffering so that we don't
- // interleave output between processes,
- // but we ought to reuse a buffer when we support multiple
- // errors.
+ compile(&src_path, &dest_path, &mut reporter).or_else(
+ |e: UnrecoverableError| {
+ // Rendering to a string ensures buffering so that we
+ // don't interleave output between processes.
let report = reporter.render(&e).to_string();
- println!("{report}\nfatal: failed to link `{}`", output);
+ println!(
+ "{report}\nfatal: failed to compile `{}`",
+ dest_path
+ );
std::process::exit(1);
- })
+ },
+ )
}
Ok(Command::Usage) => {
println!("{}", usage);
@@ -179,78 +249,169 @@ fn parse_options(opts: Options, args: Vec<String>) -> Result<Command, Fail> {
Ok(Command::Compile(input, emit, output))
}
-/// Compiler (`tamec`) error.
+/// Toplevel `tamec` error representing a failure to complete the requested
+/// operation successfully.
+///
+/// These are errors that will result in aborting execution and exiting with
+/// a non-zero status.
+/// Contrast this with [`RecoverableError`],
+/// which is reported real-time to the user and _does not_ cause the
+/// program to abort until the end of the compilation unit.
+#[derive(Debug)]
+pub enum UnrecoverableError {
+ Io(io::Error),
+ Fmt(fmt::Error),
+ XirWriterError(xir::writer::Error),
+ ErrorsDuringLowering(ErrorCount),
+}
+
+/// Number of errors that occurred during this compilation unit.
+///
+/// Let's hope that this is large enough for the number of errors you may
+/// have in your code.
+type ErrorCount = usize;
+
+/// An error that occurs during the lowering pipeline that may be recovered
+/// from to continue parsing and collection of additional errors.
///
/// This represents the aggregation of all possible errors that can occur
-/// during compile-time.
+/// during lowering.
/// This cannot include panics,
/// but efforts have been made to reduce panics to situations that
/// represent the equivalent of assertions.
+///
+/// These errors are distinct from [`UnrecoverableError`],
+/// which represents the errors that could be returned to the toplevel
+/// `main`,
+/// because these errors are intended to be reported to the user _and then
+/// recovered from_ so that compilation may continue and more errors may
+/// be collected;
+/// nobody wants a compiler that reports one error at a time.
+///
+/// Note that an recoverable error,
+/// under a normal compilation strategy,
+/// will result in an [`UnrecoverableError::ErrorsDuringLowering`] at the
+/// end of the compilation unit.
#[derive(Debug)]
-pub enum TamecError {
- Io(io::Error),
+pub enum RecoverableError {
XirParseError(ParseError<UnknownToken, xir::Error>),
- XirWriterError(xir::writer::Error),
- Fmt(fmt::Error),
+ XirfParseError(ParseError<XirToken, XirToXirfError>),
+ NirParseError(ParseError<XirfToken<RefinedText>, XirfToNirError>),
+ DesugarNirError(ParseError<SugaredNir, DesugarNirError>),
}
-impl From<io::Error> for TamecError {
+impl From<io::Error> for UnrecoverableError {
fn from(e: io::Error) -> Self {
Self::Io(e)
}
}
-impl From<ParseError<UnknownToken, xir::Error>> for TamecError {
- fn from(e: ParseError<UnknownToken, xir::Error>) -> Self {
- Self::XirParseError(e)
+impl From<fmt::Error> for UnrecoverableError {
+ fn from(e: fmt::Error) -> Self {
+ Self::Fmt(e)
}
}
-impl From<xir::writer::Error> for TamecError {
+impl From<xir::writer::Error> for UnrecoverableError {
fn from(e: xir::writer::Error) -> Self {
Self::XirWriterError(e)
}
}
-impl From<fmt::Error> for TamecError {
- fn from(e: fmt::Error) -> Self {
- Self::Fmt(e)
+impl From<ParseError<UnknownToken, xir::Error>> for RecoverableError {
+ fn from(e: ParseError<UnknownToken, xir::Error>) -> Self {
+ Self::XirParseError(e)
+ }
+}
+
+impl From<ParseError<XirToken, XirToXirfError>> for RecoverableError {
+ fn from(e: ParseError<XirToken, XirToXirfError>) -> Self {
+ Self::XirfParseError(e)
+ }
+}
+
+impl From<ParseError<XirfToken<RefinedText>, XirfToNirError>>
+ for RecoverableError
+{
+ fn from(e: ParseError<XirfToken<RefinedText>, XirfToNirError>) -> Self {
+ Self::NirParseError(e)
+ }
+}
+
+impl From<ParseError<SugaredNir, DesugarNirError>> for RecoverableError {
+ fn from(e: ParseError<SugaredNir, DesugarNirError>) -> Self {
+ Self::DesugarNirError(e)
}
}
-impl Display for TamecError {
+impl Display for UnrecoverableError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Io(e) => Display::fmt(e, f),
- Self::XirParseError(e) => Display::fmt(e, f),
- Self::XirWriterError(e) => Display::fmt(e, f),
Self::Fmt(e) => Display::fmt(e, f),
+ Self::XirWriterError(e) => Display::fmt(e, f),
+
+ // TODO: Use formatter for dynamic "error(s)"
+ Self::ErrorsDuringLowering(err_count) => {
+ write!(f, "aborting due to previous {err_count} error(s)",)
+ }
}
}
}
-impl Error for TamecError {
+impl Display for RecoverableError {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ match self {
+ Self::XirParseError(e) => Display::fmt(e, f),
+ Self::XirfParseError(e) => Display::fmt(e, f),
+ Self::NirParseError(e) => Display::fmt(e, f),
+ Self::DesugarNirError(e) => Display::fmt(e, f),
+ }
+ }
+}
+
+impl Error for UnrecoverableError {
fn source(&self) -> Option<&(dyn Error + 'static)> {
match self {
Self::Io(e) => Some(e),
- Self::XirParseError(e) => Some(e),
- Self::XirWriterError(e) => Some(e),
Self::Fmt(e) => Some(e),
+ Self::XirWriterError(e) => Some(e),
+ Self::ErrorsDuringLowering(_) => None,
}
}
}
-impl Diagnostic for TamecError {
- fn describe(&self) -> Vec<AnnotatedSpan> {
+impl Error for RecoverableError {
+ fn source(&self) -> Option<&(dyn Error + 'static)> {
match self {
- Self::XirParseError(e) => e.describe(),
+ Self::XirParseError(e) => Some(e),
+ Self::XirfParseError(e) => Some(e),
+ Self::NirParseError(e) => Some(e),
+ Self::DesugarNirError(e) => Some(e),
+ }
+ }
+}
- // TODO (will fall back to rendering just the error `Display`)
+impl Diagnostic for UnrecoverableError {
+ fn describe(&self) -> Vec<AnnotatedSpan> {
+ match self {
+ // Fall back to `Display`
_ => vec![],
}
}
}
+impl Diagnostic for RecoverableError {
+ fn describe(&self) -> Vec<AnnotatedSpan> {
+ match self {
+ Self::XirParseError(e) => e.describe(),
+ Self::XirfParseError(e) => e.describe(),
+ Self::NirParseError(e) => e.describe(),
+ Self::DesugarNirError(e) => e.describe(),
+ }
+ }
+}
+
#[cfg(test)]
mod test {
use super::*;
diff --git a/tamer/src/diagnose.rs b/tamer/src/diagnose.rs
index c2206d9..49ad22f 100644
--- a/tamer/src/diagnose.rs
+++ b/tamer/src/diagnose.rs
@@ -75,6 +75,9 @@
//!
//! See the [`report`] module for more information.
+#[macro_use]
+pub mod panic;
+
mod report;
mod resolve;
@@ -82,7 +85,7 @@ pub use report::{Reporter, VisualReporter};
pub use resolve::FsSpanResolver;
use core::fmt;
-use std::{borrow::Cow, error::Error, fmt::Display};
+use std::{borrow::Cow, convert::Infallible, error::Error, fmt::Display};
use crate::span::Span;
@@ -97,13 +100,26 @@ pub trait Diagnostic: Error + Sized {
fn describe(&self) -> Vec<AnnotatedSpan>;
}
+impl Diagnostic for Infallible {
+ fn describe(&self) -> Vec<AnnotatedSpan> {
+ // This should never actually happen unless someone is explicitly
+ // invoking this method on `Infallible`.
+ unreachable!("Infallible is not supposed to fail")
+ }
+}
+
/// Diagnostic severity level.
///
/// Levels are used both for entire reports and for styling of individual
/// [`AnnotatedSpan`]s.
///
-/// Lower levels are more severe
-/// (e.g. level 1 is the worst).
+/// Higher severity levels are represented by lower integer values
+/// (e.g. level 1 is the worst),
+/// like DEFCON levels.
+/// The rationale here is that,
+/// provided that you remember that these are 1-indexed,
+/// you do not need to know how many levels exist to know how severe it
+/// is.
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone, Copy, Default)]
#[repr(u8)]
pub enum Level {
@@ -134,6 +150,13 @@ pub enum Level {
Help,
}
+impl Level {
+ /// Whether this error level represents an error.
+ pub fn is_error(self) -> bool {
+ self <= Self::Error
+ }
+}
+
impl Display for Level {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
diff --git a/tamer/src/diagnose/panic.rs b/tamer/src/diagnose/panic.rs
new file mode 100644
index 0000000..818cd0f
--- /dev/null
+++ b/tamer/src/diagnose/panic.rs
@@ -0,0 +1,260 @@
+// TAMER diagnostic system panics
+//
+// Copyright (C) 2014-2022 Ryan Specialty Group, LLC.
+//
+// This file is part of TAME.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+//! Panic with diagnostic information.
+//!
+//! The system will produce diagnostic information using [`Error`]s
+//! implementing [`Diagnostic`] for most cases.
+//! However,
+//! sometimes the system enters an unexpected and inconsistent state that
+//! is either not worth the effort of trying to recover from,
+//! or cannot be recovered from because it represents a bug in the
+//! compiler itself.
+//! In cases such as those,
+//! a panic may be a more suitable alternative,
+//! but panics are not able to utilize spans to present additional
+//! information that the user may use to attempt to work around the issue
+//! themselves while awaiting a fix.
+//!
+//! - The macro [`diagnostic_panic!`] acts like panic,
+//! but accepts a vector of [`AnnotatedSpan`]s
+//! (just like those produced by [`Diagnostic::describe`])
+//! as its first argument to produce a diagnostic report alongside
+//! the panic.
+//! - The [`DiagnosticPanic`] trait provides alternatives to `unwrap` and
+//! `expect` methods,
+//! and utilizes [`diagnostic_panic!`].
+//! It is implemented for common types
+//! (and will be expanded as needed).
+//!
+//! Panics produced with [`diagnostic_panic!`] will output an obnoxious
+//! message stating that the error is a bug in TAMER and should be
+//! reported.
+
+use super::{AnnotatedSpan, Diagnostic, FsSpanResolver, VisualReporter};
+use std::{
+ cell::Cell,
+ error::Error,
+ fmt::{self, Debug, Display},
+};
+
+// Macro exports are unintuitive.
+#[cfg(doc)]
+use crate::diagnostic_panic;
+
+/// The type of [`Reporter`](crate::diagnose::Reporter) used to produce
+/// reports during panic operations.
+pub type PanicReporter = VisualReporter<FsSpanResolver>;
+
+/// Container for ad-hoc diagnostic data for panics.
+///
+/// This is public only because it is needed at the expansion site of
+/// [`diagnostic_panic!`].
+/// You should not use this outside of panics.
+///
+/// It is intended to be rendered _once_,
+/// after which its [`AnnotatedSpan`] vector will be consumed and become
+/// empty.
+/// The diagnostic API doesn't take ownership over the error being
+/// described.
+pub struct DiagnosticDesc<'a>(pub String, pub Cell<Vec<AnnotatedSpan<'a>>>);
+
+impl<'a> Debug for DiagnosticDesc<'a> {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ write!(f, "DiagnosticDesc")
+ }
+}
+
+impl<'a> Error for DiagnosticDesc<'a> {
+ fn source(&self) -> Option<&(dyn Error + 'static)> {
+ None
+ }
+}
+
+impl<'a> Display for DiagnosticDesc<'a> {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ match self {
+ Self(summary, _) => write!(f, "{}", summary),
+ }
+ }
+}
+
+impl<'a> Diagnostic for DiagnosticDesc<'a> {
+ fn describe(&self) -> Vec<AnnotatedSpan> {
+ match self {
+ Self(_, desc) => desc.take(),
+ }
+ }
+}
+
+/// Produce a panic with diagnostic information and a rather obnoxious
+/// message describing this issue as a bug in TAMER.
+///
+/// The first argument is of the same form as the return value of
+/// [`Diagnostic::describe`].
+///
+/// This should be used in place of [`panic!`] whenever possible.
+/// It uses the same diagnostic system as normal errors,
+/// allowing you to produce complex reports consisting of any number of
+/// spans.
+/// Considering that this error halts the system and therefore may mask
+/// other useful errors,
+/// it is important that this provide useful information if at all
+/// possible so that the user has some chance of working around the
+/// problem and getting themselves unstuck.
+#[macro_export]
+macro_rules! diagnostic_panic {
+ ($desc_data:expr, $($panic_args:tt)*) => {
+ $crate::diagnostic_panic!(
+ @panic!, $desc_data, $($panic_args)*
+ )
+ };
+
+ (@$macro:ident!, $desc_data:expr, $($panic_args:tt)*) => {{
+ use crate::diagnose::Reporter;
+
+ let mut reporter = crate::diagnose::panic::PanicReporter::new(
+ Default::default()
+ );
+
+ let summary = format!($($panic_args)*);
+ let desc = crate::diagnose::panic::DiagnosticDesc(
+ summary,
+ std::cell::Cell::new($desc_data),
+ );
+
+ $macro!(
+ "internal error:\n{}\n{}",
+ reporter.render(&desc),
+ // Be extra obnoxious.
+ // This shouldn't ever happen except under exceedingly
+ // exceptional circumstances,
+ // so it's acceptable to make a big deal about it.
+ "\x1b[0;31m
+!!! ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ !!!
+!!! THIS IS A BUG IN TAMER !!!
+!!! ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ !!!
+!!! This message means that TAMER has encountered an !!!
+!!! unrecoverable error that forced it to terminate !!!
+!!! processing. !!!
+!!! !!!
+!!! TAMER has attempted to provide you with contextual !!!
+!!! information above that might allow you to work around !!!
+!!! this problem until it can be fixed. !!!
+!!! !!!
+!!! Please report this error, including the above !!!
+!!! diagnostic output beginning with 'internal error:'. !!!
+!!! ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ !!!
+\x1b[0m"
+ )
+ }};
+}
+
+/// Produce a panic with diagnostic information and a rather obnoxious
+/// message describing this issue as a bug in TAMER,
+/// but only if debug assertions are enabled.
+///
+/// This simply gates [`diagnostic_panic!`] behind a `debug_assertions` cfg
+/// check.
+#[macro_export]
+macro_rules! debug_diagnostic_panic {
+ ($desc_data:expr, $($panic_args:tt)*) => {
+ #[cfg(debug_assertions)]
+ $crate::diagnostic_panic!($desc_data, $($panic_args)*);
+ }
+}
+
+/// Produce a panic using [`unreachable!`] with diagnostic information and a
+/// rather obnoxious message describing this issue as a bug in TAMER.
+///
+/// This should be used in place of [`diagnostic_panic`] wherever
+/// [`unreachable!`] would be used.
+#[macro_export]
+macro_rules! diagnostic_unreachable {
+ ($desc_data:expr, $($panic_args:tt)*) => {
+ $crate::diagnostic_panic!(
+ @unreachable!, $desc_data, $($panic_args)*
+ )
+ }
+}
+
+/// Alternatives to `unwrap` and `expect` that utilize
+/// [`diagnostic_panic!`].
+pub trait DiagnosticPanic {
+ /// Type to produce after unwrapping.
+ type Inner;
+
+ /// Attempt to return the inner value,
+ /// consuming `self`.
+ ///
+ /// This is an alternative to the usual `unwrap` method,
+ /// producing diagnostic information in the event of a failure.
+ /// See [`diagnostic_panic!`] for more information.
+ ///
+ /// # Panics
+ /// Panics if the inner value is not available.
+ /// For a custom message,
+ /// use [`DiagnosticPanic::diagnostic_expect`].
+ fn diagnostic_unwrap<'a>(self, desc: Vec<AnnotatedSpan<'a>>)
+ -> Self::Inner;
+
+ /// Attempt to return the inner value,
+ /// consuming `self`.
+ ///
+ /// This is an alternative to the usual `expect` method,
+ /// producing diagnostic information in the event of a failure.
+ /// See [`diagnostic_panic!`] for more information.
+ ///
+ /// # Panics
+ /// Panics if the inner value is not available with a custom `msg`.
+ fn diagnostic_expect<'a>(
+ self,
+ desc: Vec<AnnotatedSpan<'a>>,
+ msg: &str,
+ ) -> Self::Inner;
+}
+
+impl<T> DiagnosticPanic for Option<T> {
+ type Inner = T;
+
+ fn diagnostic_unwrap<'a>(
+ self,
+ desc: Vec<AnnotatedSpan<'a>>,
+ ) -> Self::Inner {
+ match self {
+ Some(val) => val,
+ // Same message as `Option::unwrap`
+ None => diagnostic_panic!(
+ desc,
+ "called `Option::unwrap()` on a `None` value"
+ ),
+ }
+ }
+
+ fn diagnostic_expect<'a>(
+ self,
+ desc: Vec<AnnotatedSpan<'a>>,
+ msg: &str,
+ ) -> Self::Inner {
+ match self {
+ Some(val) => val,
+ None => diagnostic_panic!(desc, "{}", msg),
+ }
+ }
+}
diff --git a/tamer/src/diagnose/report.rs b/tamer/src/diagnose/report.rs
index 644a919..e54fc87 100644
--- a/tamer/src/diagnose/report.rs
+++ b/tamer/src/diagnose/report.rs
@@ -74,6 +74,13 @@ pub trait Reporter {
/// diagnostic messages that were requested.
fn render<'d, D: Diagnostic>(&mut self, diagnostic: &'d D)
-> Report<'d, D>;
+
+ /// Whether any reports have been rendered with an error level or higher.
+ fn has_errors(&self) -> bool;
+
+ /// Number of reports with an error level or higher that have been
+ /// rendered.
+ fn error_count(&self) -> usize;
}
/// Render diagnostic report in a highly visual way.
@@ -86,12 +93,19 @@ pub trait Reporter {
/// understanding why the error occurred and how to approach resolving
/// it.
pub struct VisualReporter<R: SpanResolver> {
+ /// Span resolver.
+ ///
+ /// This is responsible for resolving a span to a filename with line and
+ /// column numbers.
resolver: R,
+
+ /// Number of reports with a severity level of error or higher.
+ err_n: usize,
}
impl<R: SpanResolver> VisualReporter<R> {
pub fn new(resolver: R) -> Self {
- Self { resolver }
+ Self { resolver, err_n: 0 }
}
}
@@ -110,8 +124,22 @@ impl<R: SpanResolver> Reporter for VisualReporter<R> {
// which is more aesthetically pleasing.
report.normalize_gutters();
+ if report.level.is_error() {
+ // Not worried about overflow panic
+ // (you have bigger problems if there are that many errors).
+ self.err_n += 1;
+ }
+
report
}
+
+ fn has_errors(&self) -> bool {
+ self.error_count() > 0
+ }
+
+ fn error_count(&self) -> usize {
+ self.err_n
+ }
}
/// Request a diagnostic description and immediately resolve the provided
@@ -508,7 +536,7 @@ impl<'s, 'd> Section<'d> {
/// gutter width is 2
/// ```
fn gutter_text_width(&self) -> usize {
- self.line_max.log10().add(1).max(2) as usize
+ self.line_max.ilog10().add(1).max(2) as usize
}
}
diff --git a/tamer/src/diagnose/report/test.rs b/tamer/src/diagnose/report/test.rs
index df1c8a6..3d49926 100644
--- a/tamer/src/diagnose/report/test.rs
+++ b/tamer/src/diagnose/report/test.rs
@@ -21,7 +21,7 @@ use super::*;
use crate::{
convert::ExpectInto,
diagnose::resolve::Column,
- span::{DUMMY_CONTEXT, DUMMY_SPAN},
+ span::dummy::{DUMMY_CONTEXT, DUMMY_SPAN},
};
use std::{io, num::NonZeroU32};
diff --git a/tamer/src/diagnose/report/test/integration.rs b/tamer/src/diagnose/report/test/integration.rs
index 2cf6188..7be5a27 100644
--- a/tamer/src/diagnose/report/test/integration.rs
+++ b/tamer/src/diagnose/report/test/integration.rs
@@ -114,33 +114,37 @@ const FILE_MANY_LINES: &[u8] = b"\
\n90\n91\n92\n93\n94\n95\n96\n97\n98\n99\
\n100";
-macro_rules! assert_report {
- ($msg:expr, $aspans:expr, $expected:expr) => {
- let mut resolver = HashMap::<Context, BufSpanResolver<_>>::new();
+fn new_sut() -> impl Reporter {
+ let mut resolver = HashMap::<Context, BufSpanResolver<_>>::new();
- let ctx_foo_bar = Context::from("foo/bar");
- let ctx_bar_baz = Context::from("bar/baz");
- let ctx_inv_utf = Context::from("invalid/utf8");
- let ctx_mny_lns = Context::from("many/lines");
+ let ctx_foo_bar = Context::from("foo/bar");
+ let ctx_bar_baz = Context::from("bar/baz");
+ let ctx_inv_utf = Context::from("invalid/utf8");
+ let ctx_mny_lns = Context::from("many/lines");
- resolver.insert(
- ctx_foo_bar,
- BufSpanResolver::new(Cursor::new(FILE_FOO_BAR), ctx_foo_bar),
- );
- resolver.insert(
- ctx_bar_baz,
- BufSpanResolver::new(Cursor::new(FILE_BAR_BAZ), ctx_bar_baz),
- );
- resolver.insert(
- ctx_inv_utf,
- BufSpanResolver::new(Cursor::new(FILE_INVALID_UTF8), ctx_inv_utf),
- );
- resolver.insert(
- ctx_mny_lns,
- BufSpanResolver::new(Cursor::new(FILE_MANY_LINES), ctx_mny_lns),
- );
+ resolver.insert(
+ ctx_foo_bar,
+ BufSpanResolver::new(Cursor::new(FILE_FOO_BAR), ctx_foo_bar),
+ );
+ resolver.insert(
+ ctx_bar_baz,
+ BufSpanResolver::new(Cursor::new(FILE_BAR_BAZ), ctx_bar_baz),
+ );
+ resolver.insert(
+ ctx_inv_utf,
+ BufSpanResolver::new(Cursor::new(FILE_INVALID_UTF8), ctx_inv_utf),
+ );
+ resolver.insert(
+ ctx_mny_lns,
+ BufSpanResolver::new(Cursor::new(FILE_MANY_LINES), ctx_mny_lns),
+ );
- let mut sut = VisualReporter::new(resolver);
+ VisualReporter::new(resolver)
+}
+
+macro_rules! assert_report {
+ ($msg:expr, $aspans:expr, $expected:expr) => {
+ let mut sut = new_sut();
assert_eq!(
sut.render(&StubError($msg.into(), $aspans)).to_string(),
@@ -589,3 +593,39 @@ error: wide gutter
"
);
}
+
+#[test]
+fn visual_reporter_tracks_errors() {
+ let sut = &mut new_sut();
+ let ctx = Context::from("error/tracking");
+
+ fn feed_aspan(sut: &mut impl Reporter, aspan: AnnotatedSpan<'static>) {
+ // We do not care about the report value;
+ // we're only interested in how it tracks errors for this test.
+ let _ = sut.render(&StubError("ignored".into(), vec![aspan]));
+ }
+
+ // We should start with no errors.
+ assert_eq!(sut.error_count(), 0);
+ assert!(!sut.has_errors());
+
+ // Help must not increment.
+ feed_aspan(sut, ctx.span(0, 1).help("no increment"));
+ assert_eq!(sut.error_count(), 0);
+ assert!(!sut.has_errors());
+
+ // Note must not increment.
+ feed_aspan(sut, ctx.span(0, 1).note("no increment"));
+ assert_eq!(sut.error_count(), 0);
+ assert!(!sut.has_errors());
+
+ // Error must increment.
+ feed_aspan(sut, ctx.span(0, 1).error("increment"));
+ assert_eq!(sut.error_count(), 1);
+ assert!(sut.has_errors());
+
+ // Internal error must increment.
+ feed_aspan(sut, ctx.span(0, 1).error("increment"));
+ assert_eq!(sut.error_count(), 2);
+ assert!(sut.has_errors());
+}
diff --git a/tamer/src/diagnose/resolve.rs b/tamer/src/diagnose/resolve.rs
index 8154b86..bf9e02f 100644
--- a/tamer/src/diagnose/resolve.rs
+++ b/tamer/src/diagnose/resolve.rs
@@ -730,6 +730,7 @@ impl Line {
/// Resolve spans by reading [`Context`]s from a filesystem.
///
/// This uses [`BufSpanResolver`].
+#[derive(Debug, Default)]
pub struct FsSpanResolver;
impl SpanResolver for FsSpanResolver {
diff --git a/tamer/src/fmt.rs b/tamer/src/fmt.rs
index 11fd846..291579b 100644
--- a/tamer/src/fmt.rs
+++ b/tamer/src/fmt.rs
@@ -221,32 +221,61 @@ pub trait ListDisplayWrapper {
/// [`ListDisplayWrapper::wrap`] may be used to produce a
/// [`Display`]-able object instead.
fn fmt<T: Display>(list: &[T], f: &mut Formatter) -> Result {
- let maxi = list.len().saturating_sub(1);
+ let lasti = list.len().saturating_sub(1);
// This can be further abstracted away using the above primitives,
// if ever we have a use.
for next in list.into_iter().enumerate() {
match next {
- (0, x) if maxi == 0 => {
- Self::Single::fmt(x, f)?;
- }
+ (i, x) => Self::fmt_nth(lasti, i, x, f)?,
+ };
+ }
- (0, x) => {
- Self::First::fmt(x, f)?;
- }
+ Ok(())
+ }
- (i, x) if maxi == i => {
- if i == 1 {
- Self::LastOfPair::fmt(x, f)?;
- } else {
- Self::LastOfMany::fmt(x, f)?;
- }
- }
+ /// Format an item as if it were the `i`th value of a list of length
+ /// `lasti+1`.
+ ///
+ /// This allows for generating list-like output without the expense of
+ /// actually producing a list.
+ /// This may be useful when values are stored in different memory
+ /// location,
+ /// so that the displaying of those values is a problem of invoking
+ /// this method on them in the right order,
+ /// rather than collecting them just for the sake of display.
+ /// If Rust supports `const` array/Vec functions in the future,
+ /// this may not be necessary anymore,
+ /// unless we also don't want the space cost of such a
+ /// precomputation
+ /// (but it may come with performance benefits from locality).
+ #[inline]
+ fn fmt_nth<T: Display>(
+ lasti: usize,
+ i: usize,
+ item: &T,
+ f: &mut Formatter,
+ ) -> Result {
+ match (i, item) {
+ (0, x) if lasti == 0 => {
+ Self::Single::fmt(x, f)?;
+ }
- (_, x) => {
- Self::Middle::fmt(x, f)?;
+ (0, x) => {
+ Self::First::fmt(x, f)?;
+ }
+
+ (i, x) if lasti == i => {
+ if i == 1 {
+ Self::LastOfPair::fmt(x, f)?;
+ } else {
+ Self::LastOfMany::fmt(x, f)?;
}
}
+
+ (_, x) => {
+ Self::Middle::fmt(x, f)?;
+ }
}
Ok(())
@@ -389,6 +418,22 @@ impl<'a, W: ListDisplayWrapper + ?Sized, T: Display> Display
}
}
+/// Wrap a `fmt`-like function to be used as [`Display::fmt`] for this
+/// object.
+///
+/// This works around the problem of having a function expecting a
+/// [`Formatter`],
+/// but not having a [`Formatter`] to call it with.
+/// It also allows for arbitrary (compatible) functions to be used as
+/// [`Display`].
+pub struct DisplayFn<F: Fn(&mut Formatter) -> Result>(pub F);
+
+impl<F: Fn(&mut Formatter) -> Result> Display for DisplayFn<F> {
+ fn fmt(&self, f: &mut Formatter) -> Result {
+ (self.0)(f)
+ }
+}
+
#[cfg(test)]
mod test {
use super::*;
@@ -523,4 +568,50 @@ mod test {
"things (a), (b), or (c)",
);
}
+
+ #[test]
+ fn display_fn() {
+ assert_eq!(
+ DisplayFn(|f| write!(f, "test fmt")).to_string(),
+ "test fmt",
+ );
+ }
+
+ // `fmt_nth` is used by the above tests,
+ // but that's an implementation detail;
+ // we expose it as a public API so it ought to be tested too.
+ #[test]
+ fn fmt_nth() {
+ type Sut = QualConjList<"thing", "things", "or", Raw>;
+
+ assert_eq!(
+ DisplayFn(|f| Sut::fmt_nth(0, 0, &"foo", f)).to_string(),
+ "thing foo",
+ );
+
+ assert_eq!(
+ DisplayFn(|f| Sut::fmt_nth(1, 0, &"foo", f)).to_string(),
+ "things foo",
+ );
+
+ assert_eq!(
+ DisplayFn(|f| Sut::fmt_nth(1, 1, &"foo", f)).to_string(),
+ " or foo",
+ );
+
+ assert_eq!(
+ DisplayFn(|f| Sut::fmt_nth(2, 0, &"foo", f)).to_string(),
+ "things foo",
+ );
+
+ assert_eq!(
+ DisplayFn(|f| Sut::fmt_nth(2, 1, &"foo", f)).to_string(),
+ ", foo",
+ );
+
+ assert_eq!(
+ DisplayFn(|f| Sut::fmt_nth(2, 2, &"foo", f)).to_string(),
+ ", or foo",
+ );
+ }
}
diff --git a/tamer/src/ld/poc.rs b/tamer/src/ld/poc.rs
index 2216718..720cc7d 100644
--- a/tamer/src/ld/poc.rs
+++ b/tamer/src/ld/poc.rs
@@ -40,10 +40,12 @@ use crate::{
XmloAirContext, XmloAirError, XmloError, XmloReader, XmloToAir,
XmloToken,
},
- parse::{Lower, ParseError, Parsed, ParsedObject, UnknownToken},
+ parse::{
+ FinalizeError, Lower, ParseError, Parsed, ParsedObject, UnknownToken,
+ },
sym::{GlobalSymbolResolve, SymbolId},
xir::{
- flat::{XirToXirf, XirToXirfError, XirfToken},
+ flat::{Text, XirToXirf, XirToXirfError, XirfToken},
reader::XmlXirReader,
writer::{Error as XirWriterError, XmlWriter},
DefaultEscaper, Error as XirError, Escaper, Token as XirToken,
@@ -188,32 +190,32 @@ fn load_xmlo<'a, P: AsRef<Path>, S: Escaper>(
VisitOnceFile::Visited => return Ok((asg, state)),
};
+ let src = &mut XmlXirReader::new(file, escaper, ctx)
+ .map(|result| result.map_err(TameldError::from));
+
// TODO: This entire block is a WIP and will be incrementally
// abstracted away.
let (mut asg, mut state) = Lower::<
ParsedObject<XirToken, XirError>,
- XirToXirf<64>,
- >::lower::<_, TameldError>(
- &mut XmlXirReader::new(file, escaper, ctx),
- |toks| {
- Lower::<XirToXirf<64>, XmloReader>::lower(toks, |xmlo| {
- let mut iter = xmlo.scan(false, |st, rtok| match st {
- true => None,
- false => {
- *st = matches!(
- rtok,
- Ok(Parsed::Object(XmloToken::Eoh(..)))
- );
- Some(rtok)
- }
- });
-
- Lower::<XmloReader, XmloToAir>::lower_with_context(
- &mut iter,
- state,
- |air| {
- let (_, asg) =
- Lower::<XmloToAir, AirAggregate>::lower_with_context(
+ XirToXirf<4, Text>,
+ _,
+ >::lower(src, |toks| {
+ Lower::<XirToXirf<4, Text>, XmloReader, _>::lower(toks, |xmlo| {
+ let mut iter = xmlo.scan(false, |st, rtok| match st {
+ true => None,
+ false => {
+ *st =
+ matches!(rtok, Ok(Parsed::Object(XmloToken::Eoh(..))));
+ Some(rtok)
+ }
+ });
+
+ Lower::<XmloReader, XmloToAir, _>::lower_with_context(
+ &mut iter,
+ state,
+ |air| {
+ let (_, asg) =
+ Lower::<XmloToAir, AirAggregate, _>::lower_with_context(
air,
asg,
|end| {
@@ -224,12 +226,11 @@ fn load_xmlo<'a, P: AsRef<Path>, S: Escaper>(
},
)?;
- Ok(asg)
- },
- )
- })
- },
- )?;
+ Ok::<_, TameldError>(asg)
+ },
+ )
+ })
+ })?;
let mut dir: PathBuf = path.clone();
dir.pop();
@@ -283,10 +284,11 @@ pub enum TameldError {
SortError(SortError),
XirParseError(ParseError<UnknownToken, XirError>),
XirfParseError(ParseError<XirToken, XirToXirfError>),
- XmloParseError(ParseError<XirfToken, XmloError>),
+ XmloParseError(ParseError<XirfToken<Text>, XmloError>),
XmloLowerError(ParseError<XmloToken, XmloAirError>),
AirLowerError(ParseError<AirToken, AsgError>),
XirWriterError(XirWriterError),
+ FinalizeError(FinalizeError),
CycleError(Vec<Vec<SymbolId>>),
Fmt(fmt::Error),
}
@@ -309,8 +311,8 @@ impl From<ParseError<UnknownToken, XirError>> for TameldError {
}
}
-impl From<ParseError<XirfToken, XmloError>> for TameldError {
- fn from(e: ParseError<XirfToken, XmloError>) -> Self {
+impl From<ParseError<XirfToken<Text>, XmloError>> for TameldError {
+ fn from(e: ParseError<XirfToken<Text>, XmloError>) -> Self {
Self::XmloParseError(e)
}
}
@@ -333,6 +335,12 @@ impl From<ParseError<AirToken, AsgError>> for TameldError {
}
}
+impl From<FinalizeError> for TameldError {
+ fn from(e: FinalizeError) -> Self {
+ Self::FinalizeError(e)
+ }
+}
+
impl From<XirWriterError> for TameldError {
fn from(e: XirWriterError) -> Self {
Self::XirWriterError(e)
@@ -356,6 +364,7 @@ impl Display for TameldError {
Self::XmloLowerError(e) => Display::fmt(e, f),
Self::AirLowerError(e) => Display::fmt(e, f),
Self::XirWriterError(e) => Display::fmt(e, f),
+ Self::FinalizeError(e) => Display::fmt(e, f),
Self::CycleError(cycles) => {
for cycle in cycles {
writeln!(
@@ -387,7 +396,8 @@ impl Error for TameldError {
Self::XmloLowerError(e) => Some(e),
Self::AirLowerError(e) => Some(e),
Self::XirWriterError(e) => Some(e),
- Self::CycleError(..) => None,
+ Self::FinalizeError(e) => Some(e),
+ Self::CycleError(_) => None,
Self::Fmt(e) => Some(e),
}
}
@@ -401,9 +411,13 @@ impl Diagnostic for TameldError {
Self::XmloParseError(e) => e.describe(),
Self::XmloLowerError(e) => e.describe(),
Self::AirLowerError(e) => e.describe(),
+ Self::FinalizeError(e) => e.describe(),
- // TODO (will fall back to rendering just the error `Display`)
- _ => vec![],
+ Self::Io(_)
+ | Self::SortError(_)
+ | Self::XirWriterError(_)
+ | Self::CycleError(_)
+ | Self::Fmt(_) => vec![],
}
}
}
diff --git a/tamer/src/ld/xmle/xir.rs b/tamer/src/ld/xmle/xir.rs
index eb1162c..17da428 100644
--- a/tamer/src/ld/xmle/xir.rs
+++ b/tamer/src/ld/xmle/xir.rs
@@ -140,7 +140,7 @@ impl<'a> DepListIter<'a> {
self.toks.push(Token::AttrName(QN_SRC, LSPAN));
}
- self.toks_push_attr(QN_GENERATED, match src.generated {
+ self.toks_push_attr(QN_P_GENERATED, match src.generated {
true => Some(raw::L_TRUE),
false => None,
});
diff --git a/tamer/src/ld/xmle/xir/test.rs b/tamer/src/ld/xmle/xir/test.rs
index 3eed986..6deb720 100644
--- a/tamer/src/ld/xmle/xir/test.rs
+++ b/tamer/src/ld/xmle/xir/test.rs
@@ -253,7 +253,7 @@ fn test_writes_deps() -> TestResult {
Some(ident.kind().unwrap().as_sym())
);
- let generated = attrs.find(QN_GENERATED).map(|a| a.value());
+ let generated = attrs.find(QN_P_GENERATED).map(|a| a.value());
if let Some(Source {
generated: true, ..
diff --git a/tamer/src/lib.rs b/tamer/src/lib.rs
index 8f38a41..75ba6cb 100644
--- a/tamer/src/lib.rs
+++ b/tamer/src/lib.rs
@@ -18,6 +18,11 @@
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//! An incremental rewrite of TAME in Rust.
+//!
+//! There are two entry points to this system:
+//!
+//! - [`tamec`](../tamec), the TAME compiler; and
+//! - [`tameld`](../tameld), the TAME linker.
// Constant functions are still in their infancy as of the time of writing
// (October 2021).
@@ -43,13 +48,6 @@
// this can be done more verbosely in the usual way,
// or we can write our own version.
#![feature(option_get_or_insert_default)]
-// This allows for e.g. `parse::<N>(foo)`,
-// where `fn parse<const N: T>(foo: impl Trait)`.
-// Rust devs wanted more time for public testing as of the time of writing
-// (March 2022).
-// We _could_ do without,
-// but this provides a nicer API.
-#![feature(explicit_generic_args_with_impl_trait)]
// For `Try` and `FromResidual`,
// allowing us to write our own `?`-compatible types.
#![feature(try_trait_v2)]
@@ -64,7 +62,13 @@
// Convenience features that are easily replaced if not stabalized.
#![feature(nonzero_min_max)]
#![feature(nonzero_ops)]
+// Note: this is the first time TAMER was hit by a change in an unstable
+// feature,
+// when `log10` et al. were renamed to `ilog10` et al:
+// <https://github.com/rust-lang/rust/pull/100332>
#![feature(int_log)]
+// Enabled for qualified paths in `matches!`.
+#![feature(more_qualified_paths)]
// Used for const params like `&'static str` in `crate::fmt`.
// If this is not stabalized,
// then we can do without by changing the abstraction;
@@ -73,6 +77,8 @@
#![feature(adt_const_params)]
// We build docs for private items.
#![allow(rustdoc::private_intra_doc_links)]
+// For sym::prefill recursive macro `static_symbols!`.
+#![recursion_limit = "512"]
pub mod global;
@@ -89,6 +95,7 @@ pub mod fmt;
pub mod fs;
pub mod iter;
pub mod ld;
+pub mod nir;
pub mod num;
pub mod obj;
pub mod parse;
diff --git a/tamer/src/nir.rs b/tamer/src/nir.rs
new file mode 100644
index 0000000..10bea02
--- /dev/null
+++ b/tamer/src/nir.rs
@@ -0,0 +1,442 @@
+// IR that is "near" the source code.
+//
+// Copyright (C) 2014-2022 Ryan Specialty Group, LLC.
+//
+// This file is part of TAME.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+//! An IR that is "near" the source code.
+//!
+//! This IR is "near" the source code written by the user,
+//! performing only basic normalization tasks like desugaring.
+//! It takes a verbose input language and translates it into a much more
+//! concise internal representation.
+//! The hope is that most desugaring will be done by templates in the future.
+//!
+//! NIR cannot completely normalize the source input because it does not
+//! have enough information to do so---the
+//! template system requires a compile-time interpreter that is beyond
+//! the capabilities of NIR,
+//! and so a final normalization pass must be done later on in the
+//! lowering pipeline.
+//!
+//! This is a streaming IR,
+//! meaning that the equivalent AST is not explicitly represented as a
+//! tree structure in memory.
+//!
+//! NIR is lossy and does not retain enough information for code
+//! formatting---that
+//! type of operation will require a mapping between
+//! XIRF and NIR,
+//! where the latter is used to gather enough context for formatting
+//! and the former is used as a concrete representation of what the user
+//! actually typed.
+//!
+//! For more information on the parser,
+//! see [`parse`].
+//! The entry point for NIR in the lowering pipeline is exported as
+//! [`XirfToNir`].
+
+mod desugar;
+mod parse;
+
+use crate::{
+ diagnose::{Annotate, Diagnostic},
+ fmt::{DisplayWrapper, TtQuote},
+ parse::{Object, Token},
+ span::{Span, UNKNOWN_SPAN},
+ sym::SymbolId,
+ xir::{
+ attr::{Attr, AttrSpan},
+ fmt::TtXmlAttr,
+ QName,
+ },
+};
+use std::{
+ convert::Infallible,
+ error::Error,
+ fmt::{Debug, Display},
+};
+
+pub use desugar::{DesugarNir, DesugarNirError};
+pub use parse::{
+ NirParseState as XirfToNir, NirParseStateError_ as XirfToNirError,
+};
+
+use NirSymbolTy::*;
+
+/// IR that is "near" the source code,
+/// without its syntactic sugar.
+///
+/// This form contains only primitives that cannot be reasonably represented
+/// by other primitives.
+/// This is somewhat arbitrary and may change over time,
+/// but represents a balance between the level of abstraction of the IR
+/// and performance of lowering operations.
+///
+/// See [`SugaredNir`] for more information about the sugared form.
+#[derive(Debug, PartialEq, Eq)]
+pub enum PlainNir {
+ Todo,
+
+ TplParamOpen(Plain<{ TplParamIdent }>, Plain<{ DescLiteral }>),
+ TplParamClose(Span),
+ TplParamText(Plain<{ StringLiteral }>),
+ TplParamValue(Plain<{ TplParamIdent }>),
+}
+
+type Plain<const TY: NirSymbolTy> = PlainNirSymbol<TY>;
+
+impl Token for PlainNir {
+ fn ir_name() -> &'static str {
+ "Plain NIR"
+ }
+
+ /// Identifying span of a token.
+ ///
+ /// An _identifying span_ is a selection of one of the (potentially
+ /// many) spans associated with a token that is most likely to be
+ /// associated with the identity of that token.
+ fn span(&self) -> Span {
+ use PlainNir::*;
+
+ match self {
+ Todo => UNKNOWN_SPAN,
+ TplParamOpen(dfn, _) => dfn.span(),
+ TplParamClose(span) => *span,
+ TplParamText(text) => text.span(),
+ TplParamValue(ident) => ident.span(),
+ }
+ }
+}
+
+impl Object for PlainNir {}
+
+impl Display for PlainNir {
+ fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+ use PlainNir::*;
+
+ match self {
+ Todo => write!(f, "TODO"),
+ TplParamOpen(dfn, desc) => {
+ write!(f, "open template param {dfn} ({desc})")
+ }
+ TplParamClose(_span) => write!(f, "close template param"),
+ TplParamText(text) => {
+ write!(f, "open template param default text {text}")
+ }
+ TplParamValue(ident) => {
+ write!(f, "value of template param {ident}")
+ }
+ }
+ }
+}
+
+/// Syntactic sugar atop of [`PlainNir`].
+///
+/// NIR contains various syntax features that serve as mere quality-of-life
+/// conveniences for users
+/// ("sugar" to sweeten the experience).
+/// These features do not add an expressiveness to the language,
+/// and are able to be lowered into other primitives without changing
+/// its meaning.
+///
+/// The process of lowering syntactic sugar into primitives is called
+/// "desugaring" and is carried out by the [`DesugarNir`] lowering
+/// operation,
+/// producing [`PlainNir`].
+#[derive(Debug, PartialEq, Eq)]
+pub enum SugaredNir {
+ /// A primitive token that may have sugared values.
+ Todo,
+}
+
+impl Token for SugaredNir {
+ fn ir_name() -> &'static str {
+ "Sugared NIR"
+ }
+
+ fn span(&self) -> Span {
+ use SugaredNir::*;
+
+ match self {
+ Todo => UNKNOWN_SPAN,
+ }
+ }
+}
+
+impl Object for SugaredNir {}
+
+impl Display for SugaredNir {
+ fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+ use SugaredNir::*;
+
+ match self {
+ Todo => write!(f, "TODO"),
+ }
+ }
+}
+
+/// Tag representing the type of a NIR value.
+///
+/// NIR values originate from attributes,
+/// which are refined into types as enough information becomes available.
+/// Value parsing must be deferred if a value requires desugaring or
+/// metavalue expansion.
+#[derive(Debug, PartialEq, Eq)]
+#[repr(u8)]
+pub enum NirSymbolTy {
+ AnyIdent,
+ BooleanLiteral,
+ ClassIdent,
+ ClassIdentList,
+ ConstIdent,
+ DescLiteral,
+ Dim,
+ DynNodeLiteral,
+ FuncIdent,
+ IdentDtype,
+ IdentType,
+ MapTransformLiteral,
+ NumLiteral,
+ ParamDefault,
+ ParamIdent,
+ ParamName,
+ ParamType,
+ PkgPath,
+ ShortDimNumLiteral,
+ StringLiteral,
+ SymbolTableKey,
+ TexMathLiteral,
+ Title,
+ TplMetaIdent,
+ TplIdent,
+ TplParamIdent,
+ TypeIdent,
+ ValueIdent,
+}
+
+impl Display for NirSymbolTy {
+ fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+ use NirSymbolTy::*;
+
+ match self {
+ AnyIdent => write!(f, "any identifier"),
+ BooleanLiteral => write!(
+ f,
+ "boolean literal {fmt_true} or {fmt_false}",
+ fmt_true = TtQuote::wrap("true"),
+ fmt_false = TtQuote::wrap("false"),
+ ),
+ ClassIdent => write!(f, "classification identifier"),
+ ClassIdentList => {
+ write!(f, "space-delimited list of classification identifiers")
+ }
+ ConstIdent => write!(f, "constant identifier"),
+ DescLiteral => write!(f, "description literal"),
+ Dim => write!(f, "dimension declaration"),
+ DynNodeLiteral => write!(f, "dynamic node literal"),
+ FuncIdent => write!(f, "function identifier"),
+ IdentDtype => write!(f, "identifier primitive datatype"),
+ IdentType => write!(f, "identifier type"),
+ MapTransformLiteral => write!(f, "map transformation literal"),
+ NumLiteral => write!(f, "numeric literal"),
+ ParamDefault => write!(f, "param default"),
+ ParamIdent => write!(f, "param identifier"),
+ ParamName => write!(f, "param name"),
+ ParamType => write!(f, "param type"),
+ PkgPath => write!(f, "package path"),
+ ShortDimNumLiteral => {
+ write!(f, "short-hand dimensionalized numeric literal")
+ }
+ StringLiteral => write!(f, "string literal"),
+ SymbolTableKey => write!(f, "symbol table key name"),
+ TexMathLiteral => write!(f, "TeX math literal"),
+ Title => write!(f, "title"),
+ TplMetaIdent => write!(f, "template metadata identifier"),
+ TplIdent => write!(f, "template name"),
+ TplParamIdent => write!(f, "template param identifier"),
+ TypeIdent => write!(f, "type identifier"),
+ ValueIdent => write!(f, "value identifier"),
+ }
+ }
+}
+
+/// A plain (desugared) ([`SymbolId`], [`Span`]) pair representing an
+/// attribute value that may need to be interpreted within the context of
+/// a template application.
+///
+/// _This object must be kept small_,
+/// since it is used in objects that aggregate portions of the token
+/// stream,
+/// which must persist in memory for a short period of time,
+/// and therefore cannot be optimized away as other portions of the IR.
+/// As such,
+/// this does not nest enums.
+///
+/// For the sugared form that the user may have entered themselves,
+/// see [`SugaredNirSymbol`].
+#[derive(Debug, PartialEq, Eq)]
+pub enum PlainNirSymbol<const TY: NirSymbolTy> {
+ Todo(SymbolId, Span),
+}
+
+impl<const TY: NirSymbolTy> PlainNirSymbol<TY> {
+ pub fn span(&self) -> Span {
+ match self {
+ Self::Todo(_, span) => *span,
+ }
+ }
+}
+
+impl<const TY: NirSymbolTy> Display for PlainNirSymbol<TY> {
+ fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+ match self {
+ Self::Todo(sym, _) => write!(
+ f,
+ "TODO plain {TY} {fmt_sym}",
+ fmt_sym = TtQuote::wrap(sym),
+ ),
+ }
+ }
+}
+
+/// A ([`SymbolId`], [`Span`]) pair in an attribute value context that may
+/// require desugaring.
+///
+/// For more information on desugaring,
+/// see [`DesugarNir`].
+///
+/// _This object must be kept small_,
+/// since it is used in objects that aggregate portions of the token
+/// stream,
+/// which must persist in memory for a short period of time,
+/// and therefore cannot be optimized away as other portions of the IR.
+#[derive(Debug, PartialEq, Eq)]
+pub struct SugaredNirSymbol<const TY: NirSymbolTy>(SymbolId, Span);
+
+impl<const TY: NirSymbolTy> Token for SugaredNirSymbol<TY> {
+ fn ir_name() -> &'static str {
+ // TODO: Include type?
+ "Sugared NIR Symbol"
+ }
+
+ fn span(&self) -> Span {
+ match self {
+ Self(_, span) => *span,
+ }
+ }
+}
+
+impl<const TY: NirSymbolTy> Display for SugaredNirSymbol<TY> {
+ fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+ match self {
+ Self(sym, _span) => write!(
+ f,
+ "possibly-sugared {TY} {fmt_sym}",
+ fmt_sym = TtQuote::wrap(sym),
+ ),
+ }
+ }
+}
+
+// Force developer to be conscious of any changes in size;
+// see `SugaredNirSymbol` docs for more information.
+assert_eq_size!(
+ SugaredNirSymbol<{ NirSymbolTy::AnyIdent }>,
+ (SymbolId, Span)
+);
+
+#[derive(Debug, PartialEq, Eq)]
+pub enum PkgType {
+ /// Package is intended to produce an executable program.
+ ///
+ /// This is specified by the `rater` root node.
+ Prog,
+ /// Package is intended to be imported as a component of a larger
+ /// program.
+ Mod,
+}
+
+impl<const TY: NirSymbolTy> From<(SymbolId, Span)> for SugaredNirSymbol<TY> {
+ fn from((val, span): (SymbolId, Span)) -> Self {
+ Self(val, span)
+ }
+}
+
+impl<const TY: NirSymbolTy> From<Attr> for SugaredNirSymbol<TY> {
+ fn from(attr: Attr) -> Self {
+ match attr {
+ Attr(_, val, AttrSpan(_, vspan)) => (val, vspan).into(),
+ }
+ }
+}
+
+#[derive(Debug, PartialEq, Eq)]
+pub struct Literal<const S: SymbolId>;
+
+impl<const S: SymbolId> TryFrom<Attr> for Literal<S> {
+ type Error = NirAttrParseError;
+
+ fn try_from(attr: Attr) -> Result<Self, Self::Error> {
+ match attr {
+ Attr(_, val, _) if val == S => Ok(Literal),
+ Attr(name, _, aspan) => Err(NirAttrParseError::LiteralMismatch(
+ name,
+ aspan.value_span(),
+ S,
+ )),
+ }
+ }
+}
+
+impl From<Infallible> for NirAttrParseError {
+ fn from(x: Infallible) -> Self {
+ match x {}
+ }
+}
+
+type ExpectedSymbolId = SymbolId;
+
+#[derive(Debug, PartialEq, Eq)]
+pub enum NirAttrParseError {
+ LiteralMismatch(QName, Span, ExpectedSymbolId),
+}
+
+impl Error for NirAttrParseError {
+ fn source(&self) -> Option<&(dyn Error + 'static)> {
+ None
+ }
+}
+
+impl Display for NirAttrParseError {
+ fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+ match self {
+ Self::LiteralMismatch(name, _, _) => {
+ write!(f, "unexpected value for {}", TtXmlAttr::wrap(name),)
+ }
+ }
+ }
+}
+
+impl Diagnostic for NirAttrParseError {
+ fn describe(&self) -> Vec<crate::diagnose::AnnotatedSpan> {
+ match self {
+ Self::LiteralMismatch(_, span, expected) => span
+ .error(format!("expecting {}", TtQuote::wrap(expected)))
+ .into(),
+ }
+ }
+}
diff --git a/tamer/src/nir/desugar.rs b/tamer/src/nir/desugar.rs
new file mode 100644
index 0000000..ab97e63
--- /dev/null
+++ b/tamer/src/nir/desugar.rs
@@ -0,0 +1,90 @@
+// Normalized (desugared) IR that is "near" the source code
+//
+// Copyright (C) 2014-2022 Ryan Specialty Group, LLC.
+//
+// This file is part of TAME.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+//! Desugaring of [`SugaredNir`] into the normalized [`PlainNir`] form.
+//!
+//! For more information on the flavors of NIR,
+//! see [the parent module](super).
+
+mod interp;
+
+use super::{PlainNir, SugaredNir};
+use crate::{
+ diagnose::{AnnotatedSpan, Diagnostic},
+ parse::{prelude::*, NoContext},
+};
+use std::{error::Error, fmt::Display};
+
+#[derive(Debug, PartialEq, Eq, Default)]
+pub enum DesugarNir {
+ #[default]
+ Ready,
+}
+
+impl Display for DesugarNir {
+ fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+ match self {
+ Self::Ready => write!(f, "ready for next token"),
+ }
+ }
+}
+
+impl ParseState for DesugarNir {
+ type Token = SugaredNir;
+ type Object = PlainNir;
+ type Error = DesugarNirError;
+
+ fn parse_token(
+ self,
+ tok: Self::Token,
+ _: NoContext,
+ ) -> TransitionResult<Self::Super> {
+ use SugaredNir::*;
+
+ match tok {
+ Todo => Transition(self).ok(PlainNir::Todo),
+ }
+ }
+
+ fn is_accepting(&self, _: &Self::Context) -> bool {
+ self == &Self::Ready
+ }
+}
+
+#[derive(Debug, PartialEq)]
+pub enum DesugarNirError {}
+
+impl Display for DesugarNirError {
+ fn fmt(&self, _f: &mut std::fmt::Formatter) -> std::fmt::Result {
+ // No errors yet.
+ Ok(())
+ }
+}
+
+impl Error for DesugarNirError {}
+
+impl Diagnostic for DesugarNirError {
+ fn describe(&self) -> Vec<AnnotatedSpan> {
+ // No errors yet.
+ vec![]
+ }
+}
+
+#[cfg(test)]
+mod test;
diff --git a/tamer/src/nir/desugar/interp.rs b/tamer/src/nir/desugar/interp.rs
new file mode 100644
index 0000000..2ca8ada
--- /dev/null
+++ b/tamer/src/nir/desugar/interp.rs
@@ -0,0 +1,505 @@
+// Interpolation parser for desugaring NIR
+//
+// Copyright (C) 2014-2022 Ryan Specialty Group, LLC.
+//
+// This file is part of TAME.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+//! Interpolation parser for desugaring NIR.
+//!
+//! String interpolation occurs for attributes containing curly braces
+//! (`{` and `}`)
+//! during TAME's parsing phase,
+//! before template expansion.
+//! An attribute containing curly braces is referred to in TAME as an
+//! _interpolation specification_.
+//!
+//! Interpolation is used as a form of short-hand syntactic sugar for
+//! concatenation of string literals and template metavariables,
+//! whose result is then processed by the template system.
+//! For example,
+//! consider the following code:
+//!
+//! ```xml
+//! <c:value-of name="foo{@bar@}baz" />
+//! ```
+//!
+//! The string `foo{@bar@}baz` is the interpolation specification.
+//! This ends up desugaring into the [`PlainNir`] equivalent of this:
+//!
+//! ```xml
+//! <param name="@___dsgr_01@"
+//! desc="Generated from interpolated string `foo{@bar@}baz`">
+//! <text>foo</text>
+//! <param-value name="@bar@" />
+//! <text>baz</text>
+//! </param>
+//!
+//! <c:value-of name="@___dsgr_01@" />
+//! <!-- ^^^^^^^^^^^^
+//! replacement -->
+//! ```
+//!
+//! Since interpolation currently supports only string literals and template
+//! metavariables within specifications,
+//! they are only semantically valid within the context of a template
+//! definition.
+//! This desugaring process does not check for this context;
+//! errors would occur later on in the lowering pipeline.
+//!
+//! Since interpolation desugars into [`PlainNir`],
+//! and not source XML,
+//! generated `param`s will be automatically be interpreted downstream in
+//! the lowering pipeline as if they were hoisted to the template
+//! definition header.
+//!
+//! If a string does not require interpolation,
+//! then it is interpreted as a literal within the context of the template
+//! system and is echoed back unchanged.
+//!
+//! Desugared Spans
+//! ---------------
+//! [`Span`]s for the generated tokens are derived from the specification
+//! string.
+//! In the above example,
+//! we have:
+//!
+//! ```xml
+//! <!--
+//! foo{@bar@}baz
+//! [-] [---] [-]
+//! A B C
+//! -->
+//!
+//! <text>foo</text>
+//! <!-- A -->
+//!
+//! <param-value name="@bar@">
+//! <!-- B -->
+//!
+//! <text>baz</text>
+//! <!-- C -->
+//! ```
+//!
+//! This means that any errors that subsequently occur due to contextual
+//! issues will be mapped back to a source location that makes sense to
+//! the user with a high level of granularity.
+
+use memchr::memchr;
+
+use super::super::{PlainNir, PlainNirSymbol};
+use crate::{
+ diagnose::{AnnotatedSpan, Diagnostic},
+ fmt::{DisplayWrapper, TtQuote},
+ parse::{
+ prelude::*,
+ util::{Expansion, SPair},
+ NoContext,
+ },
+ span::Span,
+ sym::{
+ st::quick_contains_byte, GlobalSymbolIntern, GlobalSymbolResolve,
+ SymbolId,
+ },
+};
+use std::{error::Error, fmt::Display};
+
+// Expose variants for enums defined in this module to reduce verbosity.
+use Expansion::*;
+use InterpState::*;
+
+/// A generated identifier.
+#[derive(Debug, PartialEq, Eq)]
+pub struct GenIdentSymbolId(SymbolId);
+
+/// A dereferenced [`SymbolId`] representing an interpolation specification.
+///
+/// This saves us from having to continuously dereference the symbol for
+/// each state change.
+type SpecSlice = &'static str;
+
+/// Offset within a [`SpecSlice`] to begin parsing at for the current
+/// [`InterpState`].
+type SpecOffset = usize;
+
+/// Interpolation desugaring operation.
+///
+/// This parser continuously yields the provided interpolation specification
+/// token as lookahead until it has completed its parsing,
+/// allowing it to stream without buffering expansion tokens.
+///
+/// The parser has two primary contexts:
+///
+/// 1. The outer literal context represented by [`ParseLiteralAt`]; and
+/// 2. The inner interpolation context
+/// (conceptually between curly braces)
+/// represented by [`ParseInterpAt`].
+///
+/// For more information,
+/// see the [parent module](super).
+#[derive(Debug, PartialEq, Eq, Default)]
+pub enum InterpState {
+ /// The next token will be inspected to determine whether it requires
+ /// interpolation.
+ #[default]
+ Ready,
+
+ /// Interpolation will continue in a literal context at the provided
+ /// offset relative to the start of the specification string.
+ ParseLiteralAt(SpecSlice, GenIdentSymbolId, SpecOffset),
+
+ /// Like [`ParseLiteralAt`],
+ /// except in the context of an interpolated value
+ /// (after having encountered a curly brace).
+ ParseInterpAt(SpecSlice, GenIdentSymbolId, SpecOffset),
+
+ /// Expansion has completed;
+ /// the final step is to replace the provided specification string
+ /// with a reference to the generated template param.
+ FinishSym(SpecSlice, GenIdentSymbolId),
+}
+
+impl Display for InterpState {
+ fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+ use InterpState::*;
+
+ match self {
+ Ready => write!(
+ f,
+ "expecting a new symbol to determine whether \
+ interpolation is necessary"
+ ),
+
+ ParseLiteralAt(spec, _, x) => write!(
+ f,
+ "parsing specification {fmt_spec} at offset {x} \
+ in a literal context",
+ fmt_spec = TtQuote::wrap(spec),
+ ),
+
+ ParseInterpAt(spec, _, x) => write!(
+ f,
+ "parsing specification {fmt_spec} at offset {x} \
+ in an interpolated value context",
+ fmt_spec = TtQuote::wrap(spec),
+ ),
+
+ FinishSym(spec, GenIdentSymbolId(gen)) => write!(
+ f,
+ "ready to replace specification {fmt_spec} \
+ with expanded metavariable reference {fmt_gen}",
+ fmt_spec = TtQuote::wrap(spec),
+ fmt_gen = TtQuote::wrap(gen),
+ ),
+ }
+ }
+}
+
+impl ParseState for InterpState {
+ type Token = SPair;
+ type Object = Expansion<SPair, PlainNir>;
+ type Error = InterpError;
+
+ fn parse_token(
+ self,
+ tok: Self::Token,
+ _: NoContext,
+ ) -> TransitionResult<Self> {
+ match (self, tok.into()) {
+ // When receiving a new symbol,
+ // we must make a quick determination as to whether it
+ // requires desugaring.
+ // Since the vast majority of symbols we encounter will require
+ // no interpolation,
+ // we first perform a separate check that is designed to
+ // filter out non-interpolated strings quickly,
+ // before we start to parse.
+ // Symbols that require no interpoolation are simply echoed back.
+ (Ready, (sym, span)) => {
+ if needs_interpolation(sym) {
+ Self::begin_expansion(sym, span)
+ } else {
+ // No desugaring is needed.
+ Self::yield_symbol(sym, span)
+ }
+ }
+
+ // The outermost parsing context is that of the literal,
+ // where a sequence of characters up to `{` stand for
+ // themselves.
+ (ParseLiteralAt(s, gen_param, offset), (sym, span)) => {
+ if offset == s.len() {
+ // We've reached the end of the specification string.
+ // Since we're in the outermost (literal) context,
+ // we're safe to complete.
+ return Self::end_expansion(s, gen_param, sym, span);
+ }
+
+ // Note that this is the position _relative to the offset_,
+ // not the beginning of the string.
+ match s[offset..].chars().position(|ch| ch == '{') {
+ // The literal is the empty string,
+ // which is useless to output,
+ // so ignore it and proceed with parsing.
+ Some(0) => {
+ Transition(ParseInterpAt(s, gen_param, offset + 1))
+ .incomplete()
+ .with_lookahead((sym, span).into())
+ }
+
+ // Everything from the offset until the curly brace is a
+ // literal.
+ Some(rel_pos) => {
+ let end = offset + rel_pos;
+
+ let literal = s[offset..end].intern();
+ let span_text = span.slice(offset, rel_pos);
+
+ let text = PlainNir::TplParamText(
+ PlainNirSymbol::Todo(literal, span_text),
+ );
+
+ Transition(ParseInterpAt(s, gen_param, end + 1))
+ .ok(Expanded(text))
+ .with_lookahead((sym, span).into())
+ }
+
+ // The remainder of the specification is a literal.
+ None => {
+ let literal = s[offset..].intern();
+ let span_text = span.slice(offset, s.len() - offset);
+
+ let text = PlainNir::TplParamText(
+ PlainNirSymbol::Todo(literal, span_text),
+ );
+
+ // Keep in the current state but update the offset;
+ // we'll complete parsing next pass.
+ Transition(ParseLiteralAt(s, gen_param, s.len()))
+ .ok(Expanded(text))
+ .with_lookahead((sym, span).into())
+ }
+ }
+ }
+
+ // Parsing is continuing after having encountered an
+ // interpolation delimiter `{`.
+ // This is an inner context that cannot complete without being
+ // explicitly closed,
+ // and cannot not be nested.
+ (ParseInterpAt(s, gen_param, offset), (sym, span)) => {
+ // TODO: Make sure offset exists, avoid panic
+ // TODO: Prevent nested `{`.
+
+ // Note that this is the position _relative to the offset_,
+ // not the beginning of the string.
+ match s[offset..].chars().position(|ch| ch == '}') {
+ Some(0) => todo!("empty interp"),
+
+ Some(rel_pos) => {
+ let end = offset + rel_pos;
+
+ // The value `@foo` in `{@foo@}`.
+ let value = s[offset..end].intern();
+
+ // Since rel_pos is 0-indexed,
+ // it is also the length of the value string.
+ let span_value = span.slice(offset, rel_pos);
+
+ let param_value = PlainNir::TplParamValue(
+ PlainNirSymbol::Todo(value, span_value),
+ );
+
+ // Continue parsing one character past the '}',
+ // back in a literal context.
+ Transition(ParseLiteralAt(s, gen_param, end + 1))
+ .ok(Expanded(param_value))
+ .with_lookahead((sym, span).into())
+ }
+
+ None => todo!("missing closing '}}'"),
+ }
+ }
+
+ // Interpolation has completed,
+ // and we're ready to replace the provided symbol
+ // (the interpolation specification)
+ // with a metavariable referencing the parameter that we just
+ // generated.
+ (FinishSym(_, GenIdentSymbolId(gen_param)), (_, span)) => {
+ Self::yield_symbol(gen_param, span)
+ }
+ }
+ }
+
+ fn is_accepting(&self, _: &Self::Context) -> bool {
+ self == &Self::Ready
+ }
+}
+
+impl InterpState {
+ /// Yield the final result of this operation in place of the original
+ /// specification string,
+ /// which may or may not have required interpolation.
+ ///
+ /// If no interpolation was required,
+ /// `sym` will be the original string;
+ /// otherwise,
+ /// `sym` ought to be a metavariable referencing the generated
+ /// template param.
+ ///
+ /// This transitions back to [`Ready`] and finally releases the
+ /// lookahead symbol.
+ fn yield_symbol(sym: SymbolId, span: Span) -> TransitionResult<Self> {
+ Transition(Ready).ok(DoneExpanding((sym, span).into()))
+ }
+
+ /// Begin expansion of an interpolation specification by generating a
+ /// new template parameter that will hold the interpolated body.
+ ///
+ /// For more information on identifier generation,
+ /// see [`gen_tpl_param_ident_at_offset`].
+ fn begin_expansion(sym: SymbolId, span: Span) -> TransitionResult<Self> {
+ let gen_param = gen_tpl_param_ident_at_offset(span);
+
+ // Description is not interned since there's no use in
+ // wasting time hashing something that will not be
+ // referenced
+ // (it's just informative for a human).
+ // Note that this means that tests cannot compare SymbolId.
+ let gen_desc = format!(
+ "Generated from interpolated string {}",
+ TtQuote::wrap(sym)
+ )
+ .clone_uninterned();
+
+ let GenIdentSymbolId(gen_param_sym) = gen_param;
+
+ let open = PlainNir::TplParamOpen(
+ PlainNirSymbol::Todo(gen_param_sym, span),
+ PlainNirSymbol::Todo(gen_desc, span),
+ );
+
+ // Begin parsing in a _literal_ context,
+ // since interpolation is most commonly utilized with literal
+ // prefixes.
+ Transition(ParseLiteralAt(sym.lookup_str(), gen_param, 0))
+ .ok(Expanded(open))
+ .with_lookahead((sym, span).into())
+ }
+
+ /// Complete expansion of an interpolation specification string.
+ ///
+ /// This closes the newly generated template param `gen_param`,
+ /// and then transitions to [`FinishSym`].
+ fn end_expansion(
+ s: SpecSlice,
+ gen_param: GenIdentSymbolId,
+ sym: SymbolId,
+ span: Span,
+ ) -> TransitionResult<Self> {
+ let close = PlainNir::TplParamClose(span);
+
+ // We have one last thing to do before we're complete,
+ // which is to perform the final replacement of the original
+ // symbol that we've been fed
+ // (the specification string).
+ Transition(FinishSym(s, gen_param))
+ .ok(Expanded(close))
+ .with_lookahead((sym, span).into())
+ }
+}
+
+/// Whether a value represented by the provided [`SymbolId`] requires
+/// interpolation.
+///
+/// _NB: This dereferences the provided [`SymbolId`] if it is dynamically
+/// allocated._
+///
+/// The provided value requires interpolation if it contains,
+/// anywhere in the string,
+/// the character [`}`].
+/// This uses [`memchr()`] on the raw byte representation of the symbol to
+/// quickly determine whether a string is only a literal and does not
+/// require any interpolation,
+/// which will be the case the vast majority of the time.
+///
+/// Since this operates on raw bytes,
+/// but we later operate on the symbol as a [`str`],
+/// it is not useful to return the located byte offset if an opening brace
+/// is found;
+/// that can be re-located quickly enough.
+#[inline]
+fn needs_interpolation(val: SymbolId) -> bool {
+ let ch = b'{';
+
+ // We can skip pre-interned symbols that we know cannot include the
+ // interpolation character.
+ // TODO: Abstract into `sym::symbol` module.
+ quick_contains_byte(val, ch)
+ .or_else(|| memchr(ch, val.lookup_str().as_bytes()).map(|_| true))
+ .unwrap_or(false)
+}
+
+/// Generate a deterministic template param identifier name that is unique
+/// relative to the offset in the source context (file) of the given
+/// [`Span`].
+///
+/// Since template params are local to the containing template,
+/// this is always safe.
+/// We are able to simply use the offset of the provided span since we will
+/// never generate more than one unique identifier at the exact same offset.
+///
+/// The identifier will include `"___dsgr"`,
+/// meaning "desugar",
+/// and serves as a unique string that can be used to track down this code
+/// that generates it.
+///
+/// Hygiene is not a concern since identifiers cannot be redeclared,
+/// so conflicts with manually-created identifiers will result in a
+/// compilation error
+/// (albeit a cryptic one);
+/// the hope is that the informally-compiler-reserved `___` convention
+/// mitigates that unlikely occurrence.
+/// Consequently,
+/// we _must_ intern to ensure that error can occur
+/// (we cannot use [`GlobalSymbolIntern::clone_uninterned`]).
+#[inline]
+fn gen_tpl_param_ident_at_offset(span: Span) -> GenIdentSymbolId {
+ GenIdentSymbolId(format!("@___dsgr_{:x}@", span.offset()).intern())
+}
+
+/// Error while desugaring an interpolation specification.
+#[derive(Debug, PartialEq)]
+pub enum InterpError {}
+
+impl Display for InterpError {
+ fn fmt(&self, _f: &mut std::fmt::Formatter) -> std::fmt::Result {
+ // No errors yet.
+ Ok(())
+ }
+}
+
+impl Error for InterpError {}
+
+impl Diagnostic for InterpError {
+ fn describe(&self) -> Vec<AnnotatedSpan> {
+ // No errors yet.
+ vec![]
+ }
+}
+
+#[cfg(test)]
+mod test;
diff --git a/tamer/src/nir/desugar/interp/test.rs b/tamer/src/nir/desugar/interp/test.rs
new file mode 100644
index 0000000..a397503
--- /dev/null
+++ b/tamer/src/nir/desugar/interp/test.rs
@@ -0,0 +1,336 @@
+// Interpolation parser for desugaring NIR
+//
+// Copyright (C) 2014-2022 Ryan Specialty Group, LLC.
+//
+// This file is part of TAME.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+use super::*;
+use crate::{
+ nir::PlainNirSymbol,
+ parse::Parsed,
+ span::dummy::{DUMMY_CONTEXT as DC, *},
+ sym::GlobalSymbolResolve,
+};
+use std::assert_matches::assert_matches;
+use Parsed::*;
+
+type Sut = InterpState;
+
+// While it'd be semantically valid to desugar a literal into a template
+// param,
+// it'd certainly be wasteful
+// (and would only be optimized away by a future lowering operation).
+// Best to just leave it be.
+#[test]
+fn does_not_desugar_literal_only() {
+ // `@bar@` is a metavariable,
+ // but it's also a literal because it's not enclosed in braces.
+ for literal in ["foo", "@bar@"] {
+ let sym = literal.into();
+ let toks = vec![SPair(sym, S1)];
+
+ assert_eq!(
+ Ok(vec![Object(DoneExpanding(SPair(sym, S1)))]),
+ Sut::parse(toks.into_iter()).collect(),
+ "literal `{literal}` must not desugar",
+ );
+ }
+}
+
+// When ending with an interpolated variable,
+// the parser should recognize that we've returned to the outer literal
+// context and permit successful termination of the specification string.
+#[test]
+fn desugars_literal_with_ending_var() {
+ let given_val = "foo{@bar@}";
+ // [-] [---]|
+ // 0 2 4 8|
+ // |B C |
+ // [--------]
+ // 0 9
+ // A
+
+ // Non-zero span offset ensures that derived spans properly consider
+ // parent offset.
+ let a = DC.span(10, 10);
+ let b = DC.span(10, 3);
+ let c = DC.span(14, 5);
+
+ let given_sym = SPair(given_val.into(), a);
+ let toks = vec![given_sym];
+
+ let GenIdentSymbolId(expect_name) = gen_tpl_param_ident_at_offset(a);
+ let expect_dfn = PlainNirSymbol::Todo(expect_name.into(), a);
+ let expect_text = PlainNirSymbol::Todo("foo".into(), b);
+ let expect_param = PlainNirSymbol::Todo("@bar@".into(), c);
+
+ let mut sut = Sut::parse(toks.into_iter());
+
+ // This is the template param generated from the interpolated string.
+ // The generated string is not interned,
+ // so we cannot match on its symbol,
+ // but that's okay since we don't entirely care what it says beyond
+ // containing the original string that it was derived from to provide
+ // helpful information to a human reader.
+ assert_matches!(
+ sut.next(),
+ Some(Ok(Object(Expanded(PlainNir::TplParamOpen(
+ dfn,
+ PlainNirSymbol::Todo(desc_str, desc_span)
+ ))))) if dfn == expect_dfn
+ && desc_str.lookup_str().contains(given_val)
+ && desc_span == a
+ );
+
+ // Note how the span associated with this is `B`,
+ // which is derived from the relevant portion of the original
+ // specification string.
+ assert_eq!(
+ sut.next(),
+ Some(Ok(Object(Expanded(PlainNir::TplParamText(expect_text)))))
+ );
+
+ // This is the actual metavariable reference,
+ // pulled out of the interpolated portion of the given value.
+ assert_eq!(
+ sut.next(),
+ Some(Ok(Object(Expanded(PlainNir::TplParamValue(expect_param))))),
+ );
+
+ // This is an object generated from user input,
+ // so the closing span has to identify what were generated from.
+ assert_eq!(
+ sut.next(),
+ Some(Ok(Object(Expanded(PlainNir::TplParamClose(a)))))
+ );
+
+ // Finally,
+ // we replace the original provided attribute
+ // (the interpolation specification)
+ // with a metavariable reference to the generated parameter.
+ assert_matches!(
+ sut.next(),
+ Some(Ok(Object(DoneExpanding(SPair(given_replace, given_span)))))
+ if given_replace == expect_name && given_span == a
+ );
+
+ assert_eq!(sut.next(), None);
+}
+
+// This is largely the same as the above test,
+// with the literal and interpolation var reversed.
+//
+// Explanations above are omitted here.
+#[test]
+fn desugars_var_with_ending_literal() {
+ let given_val = "{@foo@}bar";
+ // |[---] [-]
+ // |1 5 7 9
+ // | B C|
+ // [--------]
+ // 0 9
+ // A
+
+ let a = DC.span(20, 10);
+ let b = DC.span(21, 5);
+ let c = DC.span(27, 3);
+
+ let given_sym = SPair(given_val.into(), a);
+ let toks = vec![given_sym];
+
+ let GenIdentSymbolId(expect_name) = gen_tpl_param_ident_at_offset(a);
+ let expect_dfn = PlainNirSymbol::Todo(expect_name.into(), a);
+ let expect_param = PlainNirSymbol::Todo("@foo@".into(), b);
+ let expect_text = PlainNirSymbol::Todo("bar".into(), c);
+
+ let mut sut = Sut::parse(toks.into_iter());
+
+ //
+ // See above test for explanations that are not repeated here.
+ //
+
+ assert_matches!(
+ sut.next(),
+ Some(Ok(Object(Expanded(PlainNir::TplParamOpen(
+ dfn,
+ PlainNirSymbol::Todo(desc_str, desc_span)
+ ))))) if dfn == expect_dfn
+ && desc_str.lookup_str().contains(given_val)
+ && desc_span == a
+ );
+
+ assert_eq!(
+ sut.next(),
+ Some(Ok(Object(Expanded(PlainNir::TplParamValue(expect_param))))),
+ );
+
+ assert_eq!(
+ sut.next(),
+ Some(Ok(Object(Expanded(PlainNir::TplParamText(expect_text)))))
+ );
+
+ assert_eq!(
+ sut.next(),
+ Some(Ok(Object(Expanded(PlainNir::TplParamClose(a)))))
+ );
+
+ assert_matches!(
+ sut.next(),
+ Some(Ok(Object(DoneExpanding(SPair(given_replace, given_span)))))
+ if given_replace == expect_name && given_span == a
+ );
+
+ assert_eq!(sut.next(), None);
+}
+
+// Combination of the above two tests.
+//
+// Explanations above are omitted here.
+#[test]
+fn desugars_many_vars_and_literals() {
+ let given_val = "foo{@bar@}baz{@quux@}";
+ // [-] [---] [-] [----]|
+ // 0 2 4 8 10 14 19|
+ // |B C D E |
+ // [-------------------]
+ // 0 20
+ // A
+
+ let a = DC.span(30, 21);
+ let b = DC.span(30, 3);
+ let c = DC.span(34, 5);
+ let d = DC.span(40, 3);
+ let e = DC.span(44, 6);
+
+ let given_sym = SPair(given_val.into(), a);
+ let toks = vec![given_sym];
+
+ let GenIdentSymbolId(expect_name) = gen_tpl_param_ident_at_offset(a);
+ let expect_dfn = PlainNirSymbol::Todo(expect_name.into(), a);
+ let expect_text1 = PlainNirSymbol::Todo("foo".into(), b);
+ let expect_param1 = PlainNirSymbol::Todo("@bar@".into(), c);
+ let expect_text2 = PlainNirSymbol::Todo("baz".into(), d);
+ let expect_param2 = PlainNirSymbol::Todo("@quux@".into(), e);
+
+ let mut sut = Sut::parse(toks.into_iter());
+
+ //
+ // See above tests for explanations that are not repeated here.
+ //
+
+ assert_matches!(
+ sut.next(),
+ Some(Ok(Object(Expanded(PlainNir::TplParamOpen(
+ dfn,
+ PlainNirSymbol::Todo(desc_str, desc_span)
+ ))))) if dfn == expect_dfn
+ && desc_str.lookup_str().contains(given_val)
+ && desc_span == a
+ );
+
+ assert_eq!(
+ Ok(vec![
+ // These two are the as previous tests.
+ Object(Expanded(PlainNir::TplParamText(expect_text1))),
+ Object(Expanded(PlainNir::TplParamValue(expect_param1))),
+ // This pair repeats literals and vars further into the pattern
+ // to ensure that the parser is able to handle returning to
+ // previous states and is able to handle inputs at different
+ // offsets.
+ Object(Expanded(PlainNir::TplParamText(expect_text2))),
+ Object(Expanded(PlainNir::TplParamValue(expect_param2))),
+ ]),
+ sut.by_ref().take(4).collect(),
+ );
+
+ assert_eq!(
+ sut.next(),
+ Some(Ok(Object(Expanded(PlainNir::TplParamClose(a)))))
+ );
+
+ assert_matches!(
+ sut.next(),
+ Some(Ok(Object(DoneExpanding(SPair(given_replace, given_span)))))
+ if given_replace == expect_name && given_span == a
+ );
+
+ assert_eq!(sut.next(), None);
+}
+
+// Adjacent vars with empty literal between them.
+#[test]
+fn desugars_adjacent_interpolated_vars() {
+ let given_val = "{@foo@}{@bar@}{@baz@}";
+ // |[---] [---] [---]|
+ // |1 5 8 12 15 19|
+ // | B C D |
+ // [-------------------]
+ // 0 20
+ // A
+
+ let a = DC.span(40, 21);
+ let b = DC.span(41, 5);
+ let c = DC.span(48, 5);
+ let d = DC.span(55, 5);
+
+ let given_sym = SPair(given_val.into(), a);
+ let toks = vec![given_sym];
+
+ let GenIdentSymbolId(expect_name) = gen_tpl_param_ident_at_offset(a);
+ let expect_dfn = PlainNirSymbol::Todo(expect_name.into(), a);
+ let expect_param1 = PlainNirSymbol::Todo("@foo@".into(), b);
+ let expect_param2 = PlainNirSymbol::Todo("@bar@".into(), c);
+ let expect_param3 = PlainNirSymbol::Todo("@baz@".into(), d);
+
+ let mut sut = Sut::parse(toks.into_iter());
+
+ //
+ // See above tests for explanations that are not repeated here.
+ //
+
+ assert_matches!(
+ sut.next(),
+ Some(Ok(Object(Expanded(PlainNir::TplParamOpen(
+ dfn,
+ PlainNirSymbol::Todo(desc_str, desc_span)
+ ))))) if dfn == expect_dfn
+ && desc_str.lookup_str().contains(given_val)
+ && desc_span == a
+ );
+
+ // These are the three adjacent vars.
+ assert_eq!(
+ Ok(vec![
+ Object(Expanded(PlainNir::TplParamValue(expect_param1))),
+ Object(Expanded(PlainNir::TplParamValue(expect_param2))),
+ Object(Expanded(PlainNir::TplParamValue(expect_param3))),
+ ]),
+ sut.by_ref().take(3).collect(),
+ );
+
+ assert_eq!(
+ sut.next(),
+ Some(Ok(Object(Expanded(PlainNir::TplParamClose(a)))))
+ );
+
+ assert_matches!(
+ sut.next(),
+ Some(Ok(Object(DoneExpanding(SPair(given_replace, given_span)))))
+ if given_replace == expect_name && given_span == a
+ );
+
+ assert_eq!(sut.next(), None);
+}
diff --git a/tamer/src/nir/desugar/test.rs b/tamer/src/nir/desugar/test.rs
new file mode 100644
index 0000000..3fce72f
--- /dev/null
+++ b/tamer/src/nir/desugar/test.rs
@@ -0,0 +1,36 @@
+// Tests for NIR desugaring
+//
+// Copyright (C) 2014-2022 Ryan Specialty Group, LLC.
+//
+// This file is part of TAME.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+use super::*;
+use crate::parse::Parsed;
+
+type Sut = DesugarNir;
+
+// Given the simplicity,
+// this just really ensures that the parser terminates.
+#[test]
+fn maps_plain_nir_todo() {
+ let toks = vec![SugaredNir::Todo];
+
+ use Parsed::*;
+ assert_eq!(
+ Ok(vec![Object(PlainNir::Todo)]),
+ Sut::parse(toks.into_iter()).collect(),
+ );
+}
diff --git a/tamer/src/nir/parse.rs b/tamer/src/nir/parse.rs
new file mode 100644
index 0000000..d08af9e
--- /dev/null
+++ b/tamer/src/nir/parse.rs
@@ -0,0 +1,1871 @@
+// Normalized source IR
+//
+// Copyright (C) 2014-2022 Ryan Specialty Group, LLC.
+//
+// This file is part of TAME.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+//! NIR parser.
+//!
+//! For general information about NIR,
+//! see the [parent module](super).
+//!
+//! The entry point for this parser in the lowering pipeline is
+//! [`NirParseState`].
+//! The grammar is defined declaratively using the [`ele_parse!`]
+//! parser-generator,
+//! which yields a parser compatible with TAME's [`crate::parse`]
+//! framework.
+//!
+//! Grammar Definition
+//! ==================
+//! The grammar can be seen in the TAMER sources;
+//! if you are viewing the generated documentation,
+//! it can be viewed by clicking on "source" in the upper-right-hand
+//! corner of this page,
+//! or on each individual identifier.
+//!
+//! The grammar defines nonterminals (NTs) of two forms:
+//!
+//! 1. [XIR](crate::xir) elements with their attributes and child NTs; and
+//! 2. Sum NTs of the form `(NT₀ | NT₁ | … | NTₙ)` which match on any of
+//! inner NTs.
+//!
+//! Terminals are specified in element name and attribute contexts as
+//! [static QName](crate::xir::st::qname) constants of the form `QN_*`.
+//! These constants are defined in [`crate::xir::st::qname`] and allow the
+//! to efficiently match on element and attribute names by comparing a
+//! single 64-bit integer value,
+//! which in turn may be optimized to compare many possible QName
+//! values simultaneously.
+//!
+//! The style of the grammar is meant to be a combination of a BNF and Rust
+//! syntax.
+//!
+//! Repetition and Templates
+//! ------------------------
+//! _All NTs are implicitly defined as zero-or-more_
+//! (as with the Kleene star),
+//! and this behavior cannot be overridden.
+//! The rationale for this is somewhat complex,
+//! but the tradeoff greatly simplifies the [`ele_parse!`]
+//! parser-generator in recognition of a simple fact about NIR:
+//! it cannot determine statically whether a source file will conform to
+//! TAME's grammar when all templates are expanded.
+//!
+//! Templates require an interpreter and are expanded later in the lowering
+//! pipeline.
+//! NIR is unable to perform that expansion,
+//! and so we do the best we can do in this situation:
+//! verify that templates,
+//! when expanded,
+//! will expand into primitives known to NIR,
+//! and validate those primitives when possible.
+//! This can only go so far,
+//! given that templates can appear virtually anywhere in the source tree.
+//!
+//! Because templates are able to expand into anything that is known to
+//! NIR's grammar,
+//! NIR cannot know whether a required element has been provided or not.
+//! Consequently,
+//! we cannot require that an element be present as part of NIR's grammar,
+//! since it may have been hidden behind a template application.
+//! For the same reason,
+//! we cannot place _any_ restrictions on the number of repetitions of a
+//! particular element.
+//!
+//! The best we can do is therefore to merely validate that,
+//! whatever _is_ present,
+//! is conceivably valid at that position within the grammar.
+//! It is then the burden of a future lowering operation to validate the
+//! grammar post-expansion.
+//!
+//! What NIR therefore provides is an IR that is _closed_ under template
+//! application---this
+//! means that,
+//! when a template _is_ expanded into an application site,
+//! it _will_ expand into a sequence of parsed NIR tokens and cannot
+//! possibly expand into anything else.
+//! What the template system does with those tokens is beyond our concern.
+//!
+//! See [`TplKw`] for template tokens that are accepted anywhere.
+
+use super::{NirSymbolTy::*, *};
+use crate::{
+ ele_parse,
+ sym::st::raw::*,
+ xir::{
+ attr::Attr,
+ st::{prefix::*, qname::*},
+ },
+};
+
+type N<const TY: NirSymbolTy> = SugaredNirSymbol<TY>;
+
+ele_parse! {
+ /// Parser lowering [XIR](crate::xir) into [`SugaredNir`].
+ ///
+ /// TAME's grammar is embedded within XML.
+ /// The outer XML document has its own grammar,
+ /// which is parsed by [XIR](crate::xir);
+ /// this parser is responsible for taking the TAME grammar within
+ /// a valid XML document and parsing it into [NIR](crate::nir).
+ ///
+ /// Limitations of NIR
+ /// ------------------
+ /// It is important to understand the purposeful
+ /// (and practical)
+ /// limitations of NIR.
+ /// The grammar of NIR declares what _could acceptably appear_ in
+ /// various contexts;
+ /// it is _not_ intended to comprehensively validate what _ought_ to
+ /// appear in every conceivable context.
+ /// Because TAME is a metalanguage
+ /// (through use of its template system),
+ /// we are not able to know the full grammar of the language without
+ /// compile-time template evaluation,
+ /// and so NIR's grammar will always accept a _superset_ of all
+ /// valid programs.
+ ///
+ /// With that said,
+ /// NIR will always lower primitives,
+ /// including within template definitions.
+ /// Because of this,
+ /// all programs _are_ closed under NIR,
+ /// and we can be confident that all expanded templates will be able
+ /// to expand into a program that can be represented by NIR.
+ /// Whether or not a particular expansion is semantically valid is
+ /// beyond the scope of NIR and should be handled as part of another
+ /// lowering operation.
+ ///
+ /// See the [parent module](super) for more information.
+ ///
+ /// Superstate
+ /// ----------
+ pub enum NirParseState;
+
+ type AttrValueError = NirAttrParseError;
+ type Object = SugaredNir;
+
+ // Text and template expressions may appear at any point within the
+ // program;
+ // see [`NirParseState`] for more information.
+ [super] {
+ [text](_sym, _span) => SugaredNir::Todo,
+ TplKw
+ };
+
+ /// All valid root elements declaring valid package types.
+ ///
+ /// Historically (in XSLT),
+ /// these packages did not all share the same compiler.
+ /// This is not the case with TAMER.
+ ///
+ /// When the term "package" is used without an explicit qualifier,
+ /// it generally refers to a package containing only calculations and
+ /// classifications.
+ PkgTypeStmt := (
+ RaterStmt
+ | PackageStmt
+ | ProgramMapStmt
+ | ReturnMapStmt
+ | WorksheetStmt
+ );
+
+
+ /////////////////////////
+ ////////////////////////
+ ////
+ //// Package Stmts
+ ////
+
+ /// Like a [`PackageStmt`],
+ /// but producing an executable program.
+ ///
+ /// The term "rater" is historical,
+ /// since TAME was designed for producing insurance rating systems.
+ RaterStmt := QN_RATER {
+ @ {
+ _xmlns: (QN_XMLNS) => Literal<URI_LV_RATER>,
+ _xmlns_c: (QN_XMLNS_C) => Literal<URI_LV_CALC>,
+ _xmlns_t: (QN_XMLNS_T) => Literal<URI_LV_TPL>,
+
+ // TODO: Is this still needed?
+ // TODO: PkgName type
+ _name: (QN_NAME) => N<{PkgPath}>,
+ } => SugaredNir::Todo,
+
+ ImportStmt,
+ PkgBodyStmt,
+ };
+
+ /// Non-program package for calculations and logic.
+ ///
+ /// A package is a reusable module that can be imported by other
+ /// packages.
+ /// See [`PkgTypeStmt`] for more information on the distinction between
+ /// different package types.
+ PackageStmt := QN_PACKAGE {
+ @ {
+ _xmlns: (QN_XMLNS) => Literal<URI_LV_RATER>,
+ _xmlns_c: (QN_XMLNS_C) => Literal<URI_LV_CALC>,
+ _xmlns_t: (QN_XMLNS_T) => Literal<URI_LV_TPL>,
+
+ // TODO: Having trouble getting rid of `@xmlns:lv` using Saxon
+ // for `progui-pkg`,
+ // so just allow for now.
+ // It can't actually be used on nodes.
+ _xmlns_lv: (QN_XMLNS_LV?) => Option<Literal<URI_LV_RATER>>,
+
+ _id: (QN_ID?) => Option<N<{PkgPath}>>,
+ _title: (QN_TITLE?) => Option<N<{Title}>>,
+ _desc: (QN_DESC?) => Option<N<{DescLiteral}>>,
+ // TODO: When can we get rid of this?
+ _core: (QN_CORE?) => Option<N<{BooleanLiteral}>>,
+ _program: (QN_PROGRAM?) => Option<N<{BooleanLiteral}>>,
+
+ // TODO: Can this go away now?
+ _name: (QN_NAME?) => Option<N<{PkgPath}>>,
+ } => SugaredNir::Todo,
+
+ ImportStmt,
+ PkgBodyStmt,
+ };
+
+ /// Import another package's symbol table into this one.
+ ///
+ /// Imports allow referencing identifiers from another package and allow
+ /// for composing larger systems out of smaller components.
+ ImportStmt := QN_IMPORT {
+ @ {
+ _pkg: (QN_PACKAGE) => N<{PkgPath}>,
+ _export: (QN_EXPORT?) => Option<N<{BooleanLiteral}>>,
+ } => SugaredNir::Todo,
+ };
+
+ /// A statement that is accepted within the body of a package.
+ ///
+ /// The parent context for these statements is most often
+ /// [`PackageStmt`].
+ PkgBodyStmt := (
+ ExternStmt
+ | ParamStmt
+ | ConstStmt
+ | ClassifyStmt
+ | RateStmt
+ | RateEachStmt
+ | TypedefStmt
+ | YieldStmt
+ | SectionStmt
+ | TemplateStmt
+ | FunctionStmt
+ );
+
+ /// Statements that are valid within the context of a [`PkgBodyStmt`]
+ /// and may be directly referenced within the body of a template.
+ ///
+ /// See [`AnyStmtOrExpr`] for more information on why this is needed.
+ PkgStmtInner := (
+ ConstStmtBody
+ | InnerTypedefStmt
+ );
+
+ /// Declare a symbol that must be defined in some other package.
+ ///
+ /// Externs are effectively the same concept as in C---they
+ /// declare symbols that we /expect/ to exist at some point,
+ /// but we do not know where they will be defined.
+ /// The linker will verify,
+ /// while linking the program,
+ /// that /at most one/ other package provides a definition for this
+ /// symbol and that the definition is compatible with this
+ /// declaration.
+ ExternStmt := QN_EXTERN {
+ @ {
+ _name: (QN_NAME) => N<{AnyIdent}>,
+ _ty: (QN_TYPE) => N<{IdentType}>,
+ _dtype: (QN_DTYPE?) => Option<N<{IdentDtype}>>,
+ _dim: (QN_DIM) => N<{NumLiteral}>,
+ _parent: (QN_PARENT?) => Option<N<{AnyIdent}>>,
+ _yields: (QN_YIELDS?) => Option<N<{ValueIdent}>>,
+ } => SugaredNir::Todo,
+ };
+
+ /// Define an input parameter accepting data from an external system.
+ ///
+ /// Parameters are generally populated via a map,
+ /// such as [`ProgramMapStmt`].
+ ParamStmt := QN_PARAM {
+ @ {
+ _name: (QN_NAME) => N<{ParamName}>,
+ _ty: (QN_TYPE) => N<{ParamType}>,
+ _desc: (QN_DESC) => N<{DescLiteral}>,
+ // This is a misnomer.
+ _set: (QN_SET?) => Option<N<{Dim}>>,
+ _default: (QN_DEFAULT?) => Option<N<{ParamDefault}>>,
+ _sym: (QN_SYM?) => Option<N<{TexMathLiteral}>>,
+ } => SugaredNir::Todo,
+ };
+
+ /// Associate static data with an identifier.
+ ///
+ /// Constants may be associated with scalar, vector, or matrix values.
+ /// Since all values in TAME are immutable,
+ /// constants are a way to denote values that are entirely hard-coded
+ /// rather than being derived from some external input.
+ ///
+ /// In the future,
+ /// constants ought to be defined as expressions that can be evaluated
+ /// at compile-time,
+ /// and re-use that familiar syntax.
+ ConstStmt := QN_CONST {
+ @ {
+ _name: (QN_NAME) => N<{ConstIdent}>,
+ _desc: (QN_DESC) => N<{DescLiteral}>,
+ _value: (QN_VALUE?) => Option<N<{NumLiteral}>>,
+ _values: (QN_VALUES?) => Option<N<{ShortDimNumLiteral}>>,
+ // TODO: deprecate?
+ _ty: (QN_TYPE?) => Option<N<{TypeIdent}>>,
+ _sym: (QN_SYM?) => Option<N<{TexMathLiteral}>>,
+ // TODO: Misnomer
+ _set: (QN_SET?) => Option<N<{Dim}>>,
+ } => SugaredNir::Todo,
+
+ ConstStmtBody,
+ };
+
+ /// Body of a [`ConstStmt`] defining a vector value or a matrix row.
+ ///
+ /// Scalar constants utilize [`QN_VALUE`] instead of this body.
+ ///
+ /// See also [`QN_VALUES`],
+ /// which can be used as a short-hand form of this body.
+ ConstStmtBody := (ConstMatrixRow | ConstVectorItem);
+
+ /// Constant matrix row definition.
+ ///
+ /// TODO: The use of [`QN_SET`] is a terrible misnomer representing
+ /// dimensionality and will be changed in future versions.
+ ConstMatrixRow := QN_SET {
+ @ {
+ _desc: (QN_DESC) => N<{DescLiteral}>,
+ } => SugaredNir::Todo,
+
+ ConstVectorItem,
+ };
+
+ /// Constant vector scalar item definition.
+ ConstVectorItem := QN_ITEM {
+ @ {
+ _value: (QN_VALUE) => N<{NumLiteral}>,
+ _desc: (QN_DESC) => N<{DescLiteral}>,
+ } => SugaredNir::Todo,
+ };
+
+ /// Define a classification and associate it with an identifier.
+ ///
+ /// A classification is a logic expression yielding a boolean result
+ /// with the dimensionality matching the largest dimensionality of its
+ /// inputs.
+ ClassifyStmt := QN_CLASSIFY {
+ @ {
+ _name: (QN_AS) => N<{ClassIdent}>,
+ _desc: (QN_DESC) => N<{DescLiteral}>,
+ _any: (QN_ANY?) => Option<N<{BooleanLiteral}>>,
+ _yields: (QN_YIELDS?) => Option<N<{ValueIdent}>>,
+ _sym: (QN_SYM?) => Option<N<{TexMathLiteral}>>,
+ _terminate: (QN_TERMINATE?) => Option<N<{BooleanLiteral}>>,
+ } => SugaredNir::Todo,
+
+ LogExpr,
+ };
+
+ /// Define a calculation and associate it with an identifier.
+ ///
+ /// The term "rate" is intended as a verb,
+ /// and represents an arbitrary calculation;
+ /// the term originates from TAME's history as an insurance rating
+ /// system.
+ /// This will eventually be renamed to a more general term.
+ RateStmt := QN_RATE {
+ @ {
+ _class: (QN_CLASS?) => Option<N<{ClassIdent}>>,
+ _no: (QN_NO?) => Option<N<{ClassIdentList}>>,
+ _yields: (QN_YIELDS) => N<{ValueIdent}>,
+ _desc: (QN_DESC?) => Option<N<{DescLiteral}>>,
+ _sym: (QN_SYM?) => Option<N<{TexMathLiteral}>>,
+
+ // TODO: This is still recognized by the XSLT-based compiler,
+ // so we need to support it until it's removed.
+ _gentle_no: (QN_GENTLE_NO?) => Option<N<{BooleanLiteral}>>,
+
+ // TODO: We'll have private-by-default later.
+ // This is a kludge.
+ _local: (QN_LOCAL?) => Option<N<{BooleanLiteral}>>,
+ } => SugaredNir::Todo,
+
+ CalcExpr,
+ };
+
+ /// Define a calculation that maps a calculation to each item of a
+ /// vector,
+ /// and associate it with an identifier.
+ ///
+ /// This expands into an equivalent [`RateStmt`] with a nested
+ /// [`SumExpr`] serving as the item-wise map.
+ RateEachStmt := QN_RATE_EACH {
+ @ {
+ _class: (QN_CLASS) => N<{ClassIdentList}>,
+ _no: (QN_NO?) => Option<N<{ClassIdentList}>>,
+ _generates: (QN_GENERATES?) => Option<N<{ValueIdent}>>,
+ _index: (QN_INDEX) => N<{ValueIdent}>,
+ _yields: (QN_YIELDS?) => Option<N<{ValueIdent}>>,
+ _sym: (QN_SYM?) => Option<N<{TexMathLiteral}>>,
+ _gensym: (QN_GENSYM?) => Option<N<{TexMathLiteral}>>,
+ } => SugaredNir::Todo,
+
+ CalcExpr,
+ };
+
+ /// Define a new type that restricts the domain of data.
+ TypedefStmt := QN_TYPEDEF {
+ @ {
+ _name: (QN_NAME) => N<{TypeIdent}>,
+ _desc: (QN_DESC) => N<{DescLiteral}>,
+ _sym: (QN_SYM?) => Option<N<{TexMathLiteral}>>,
+ } => SugaredNir::Todo,
+
+ InnerTypedefStmt,
+ };
+
+ /// Body of a [`TypedefStmt`].
+ InnerTypedefStmt := (BaseTypeStmt | EnumStmt | UnionStmt);
+
+ /// Indicate that the type is defined by the TAME compiler.
+ ///
+ /// This is used for primitives and allows for core types to be exposed
+ /// to the user.
+ BaseTypeStmt := QN_BASE_TYPE {
+ @ {} => SugaredNir::Todo,
+ };
+
+ /// Define an enumerated type.
+ ///
+ /// Enums are types that have an explicit set of values,
+ /// each with associated constant identifiers.
+ EnumStmt := QN_ENUM {
+ @ {
+ _ty: (QN_TYPE) => N<{TypeIdent}>,
+ } => SugaredNir::Todo,
+
+ ItemEnumStmt,
+ };
+
+ /// Define an item of the domain of an enumerated type and associate it
+ /// with a constant identifier.
+ ItemEnumStmt := QN_ITEM {
+ @ {
+ _name: (QN_NAME) => N<{ConstIdent}>,
+ _value: (QN_VALUE) => N<{NumLiteral}>,
+ _desc: (QN_DESC) => N<{DescLiteral}>,
+ } => SugaredNir::Todo,
+ };
+
+ /// Define a type whose domain is the union of the domains of multiple
+ /// other types.
+ UnionStmt := QN_UNION {
+ @ {} => SugaredNir::Todo,
+
+ TypedefStmt,
+ };
+
+ /// A final numerical value to be yielded by a program.
+ ///
+ /// This value has historical significance,
+ /// but is slowly being deprecated.
+ /// Any number of values can be returned to the caller via a return map
+ /// (see [`ReturnMapStmt`]).
+ ///
+ /// This is being replaced with the `__yield__` template in `core`
+ /// (this statement predates the template system in TAME).
+ YieldStmt := QN_YIELD {
+ @ {} => SugaredNir::Todo,
+
+ CalcExpr,
+ };
+
+ /// Declare that the body of this statement ought to be delimited from
+ /// the surrounding definitions with a heading when visualized.
+ ///
+ /// This is intended primarily for documentation,
+ /// and serves as an alternative to using packages for sectioning.
+ /// Since definitions in TAME are independent from the order of
+ /// execution of the resulting executable,
+ /// definitions tend to be linear and can sometimes benefit from
+ /// grouping for organization and to help guide the reader.
+ ///
+ /// Otherwise,
+ /// the body of a section is the same as that of [`PackageStmt`],
+ /// with the exception of imports,
+ /// which must appear outside of sections.
+ SectionStmt := QN_SECTION {
+ @ {
+ _title: (QN_TITLE) => N<{Title}>,
+ } => SugaredNir::Todo,
+
+ PkgBodyStmt,
+ };
+
+ /// Define a function and associate it with an identifier.
+ FunctionStmt := QN_FUNCTION {
+ @ {
+ _name: (QN_NAME) => N<{FuncIdent}>,
+ _desc: (QN_DESC) => N<{DescLiteral}>,
+ _sym: (QN_SYM?) => Option<N<{TexMathLiteral}>>,
+ } => SugaredNir::Todo,
+
+ FunctionParamStmt,
+ CalcExpr,
+ };
+
+ /// Define a function parameter and associate it with an identifier that
+ /// is scoped to the function body.
+ FunctionParamStmt := QN_PARAM {
+ @ {
+ _name: (QN_NAME) => N<{ParamIdent}>,
+ _ty: (QN_TYPE) => N<{TypeIdent}>,
+ // _TODO: This is a misnomer.
+ _set: (QN_SET?) => Option<N<{Dim}>>,
+ _desc: (QN_DESC) => N<{DescLiteral}>,
+ } => SugaredNir::Todo,
+ };
+
+
+ /////////////////////////
+ ////////////////////////
+ ////
+ //// Logic Expressions
+ ////
+
+ /// A logic expression.
+ ///
+ /// See _The TAME Programming Language_ document for a formal definition
+ /// of this subsystem and its syntax.
+ LogExpr := (MatchExpr | AnyExpr | AllExpr);
+
+ /// Scalar value predicate as part of a logic expression.
+ ///
+ /// The dimensionality of the expression will be automatically
+ /// determined by the dimensionality of the matches' [`@on`](QN_ON).
+ MatchExpr := QN_MATCH {
+ @ {
+ _on: (QN_ON) => N<{ValueIdent}>,
+ _value: (QN_VALUE?) => Option<N<{ValueIdent}>>,
+ _index: (QN_INDEX?) => Option<N<{ValueIdent}>>,
+ _anyof: (QN_ANY_OF?) => Option<N<{TypeIdent}>>,
+ } => SugaredNir::Todo,
+
+ CalcPredExpr,
+ };
+
+ /// Logical disjunction (∨).
+ ///
+ /// This represents an expression that matches when _any_ of its inner
+ /// [`LogExpr`] expressions match.
+ AnyExpr := QN_ANY {
+ @ {} => SugaredNir::Todo,
+
+ LogExpr,
+ };
+
+ /// Logical conjunction (∧).
+ ///
+ /// This represents an expression that matches when _all_ of its inner
+ /// [`LogExpr`] expressions match.
+ AllExpr := QN_ALL {
+ @ {} => SugaredNir::Todo,
+
+ LogExpr,
+ };
+
+
+ /////////////////////////
+ ////////////////////////
+ ////
+ //// Calculations
+ ////
+
+ /// An expression producing a scalar result.
+ ///
+ /// Some expressions may support binding to additional identifiers.
+ CalcExpr := (
+ SumExpr
+ | ProductExpr
+ | QuotientExpr
+ | ExptExpr
+ | ValueOfExpr
+ | ConstExpr
+ | VectorExpr
+ | CasesExpr
+ | CeilExpr
+ | FloorExpr
+ | LengthOfExpr
+ | LetExpr
+ | ApplyExpr
+ | RecurseExpr
+ | ConsExpr
+ | CarExpr
+ | CdrExpr
+ );
+
+ /// Expressions that are valid within the context of one or more
+ /// [`CalcExpr`] and may be directly referenced within the body of a
+ /// template.
+ ///
+ /// See [`AnyStmtOrExpr`] for more information on why this is needed.
+ CalcExprInner := (
+ CalcPredExpr
+ | CaseExpr
+ | OtherwiseExpr
+ | LetValues
+ | LetValue
+ | WhenExpr
+ | ApplyArg
+ );
+
+ /// Summation (Σ) expression.
+ ///
+ /// When using [`@of`](QN_OF),
+ /// summation can also be used to produce a generator where each
+ /// iteration over `@of` yields a corresponding element in the vector
+ /// identified by [`@generates`](QN_GENERATES).
+ ///
+ /// Summation is generated automatically by [`RateEachStmt`].
+ SumExpr := QN_C_SUM {
+ @ {
+ _of: (QN_OF?) => Option<N<{ValueIdent}>>,
+ _generates: (QN_GENERATES?) => Option<N<{ValueIdent}>>,
+ _index: (QN_INDEX?) => Option<N<{ValueIdent}>>,
+ _desc: (QN_DESC?) => Option<N<{DescLiteral}>>,
+ _label: (QN_LABEL?) => Option<N<{DescLiteral}>>,
+ _sym: (QN_SYM?) => Option<N<{TexMathLiteral}>>,
+ _dim: (QN_DIM?) => Option<N<{Dim}>>,
+ } => SugaredNir::Todo,
+
+ WhenExpr,
+ CalcExpr,
+ };
+
+ /// Product (Π) expression.
+ ///
+ /// When using [`@of`](QN_OF),
+ /// product can also be used to produce a generator where each
+ /// iteration over `@of` yields a corresponding element in the vector
+ /// identified by [`@generates`](QN_GENERATES).
+ ProductExpr := QN_C_PRODUCT {
+ @ {
+ _of: (QN_OF?) => Option<N<{ValueIdent}>>,
+ _generates: (QN_GENERATES?) => Option<N<{ValueIdent}>>,
+ _index: (QN_INDEX?) => Option<N<{ValueIdent}>>,
+ _desc: (QN_DESC?) => Option<N<{DescLiteral}>>,
+ _label: (QN_LABEL?) => Option<N<{DescLiteral}>>,
+ _dot: (QN_DOT?) => Option<N<{BooleanLiteral}>>,
+ _sym: (QN_SYM?) => Option<N<{TexMathLiteral}>>,
+ _dim: (QN_DIM?) => Option<N<{Dim}>>,
+ } => SugaredNir::Todo,
+
+ WhenExpr,
+ CalcExpr,
+ };
+
+ /// Quotient (÷) expression.
+ ///
+ /// Traditionally,
+ /// TAME expected quotients to contain a numerator and a denominator
+ /// as only two [`CalcExpr`] expressions
+ /// (though either could be a [`QuotientExpr`] as well).
+ /// TAMER will be relaxing that restriction.
+ QuotientExpr := QN_C_QUOTIENT {
+ @ {
+ _label: (QN_LABEL?) => Option<N<{DescLiteral}>>,
+ } => SugaredNir::Todo,
+
+ CalcExpr,
+ };
+
+ /// Exponentiation (_xʸ_) expression.
+ ///
+ /// The first [`CalcExpr`] will be raised to the power of the second
+ /// [`CalcExpr`],
+ /// which will be raised to the power of any third,
+ /// and so on.
+ /// Traditionally,
+ /// TAME expected only a base and an exponent
+ /// (respectively),
+ /// but TAMER will be relaxing that restriction.
+ ExptExpr := QN_C_EXPT {
+ @ {} => SugaredNir::Todo,
+ CalcExpr,
+ };
+
+ /// Expression yielding a scalar value of the provided identifier.
+ ///
+ /// The identifier is named by [`@name`](QN_NAME),
+ /// with vectors requiring an [`@index`](QN_INDEX).
+ /// Matrices require use of a nested [`IndexExpr`] qualifier to resolve
+ /// a scalar.
+ ValueOfExpr := QN_C_VALUE_OF {
+ @ {
+ _name: (QN_NAME) => N<{ValueIdent}>,
+ _index: (QN_INDEX?) => Option<N<{ValueIdent}>>,
+ _label: (QN_LABEL?) => Option<N<{DescLiteral}>>,
+ } => SugaredNir::Todo,
+
+ IndexExpr,
+ WhenExpr,
+ };
+
+ /// Expression qualifying an index of a parent expresion.
+ ///
+ /// The result of the inner [`CalcExpr`] is used as a subscript of the
+ /// parent expression.
+ /// Sibling [`IndexExpr`]s evaluate to nested subscripts where the
+ /// subling applies to the result of the previous index operation
+ /// such that **M**_ⱼ,ₖ_ ≡ (**M**_ⱼ_)_ₖ_.
+ IndexExpr := QN_C_INDEX {
+ @ {
+ _label: (QN_LABEL?) => Option<N<{DescLiteral}>>,
+ } => SugaredNir::Todo,
+ CalcExpr,
+ };
+
+ /// Expression yielding a constant scalar value.
+ ConstExpr := QN_C_CONST {
+ @ {
+ _value: (QN_VALUE) => N<{NumLiteral}>,
+ // TODO: Description was historically required to avoid magic
+ // values,
+ // but we now have short-hand constants which do not require
+ // descriptions.
+ // We should probably require both or neither,
+ // but requiring `c:value-of` short-hand wouldn't be
+ // the responsibility of NIR,
+ // so perhaps then neither should be.
+ _desc: (QN_DESC?) => Option<N<{DescLiteral}>>,
+ // _TODO: deprecate?
+ _ty: (QN_TYPE?) => Option<N<{TypeIdent}>>,
+ } => SugaredNir::Todo,
+
+ WhenExpr,
+ };
+
+ /// Ceiling (⌈_x_⌉) expression.
+ CeilExpr := QN_C_CEIL {
+ @ {
+ _label: (QN_LABEL?) => Option<N<{DescLiteral}>>,
+ } => SugaredNir::Todo,
+ CalcExpr,
+ };
+
+ /// Floor (⌊_x_⌋) expression.
+ FloorExpr := QN_C_FLOOR {
+ @ {
+ _label: (QN_LABEL?) => Option<N<{DescLiteral}>>,
+ } => SugaredNir::Todo,
+ CalcExpr,
+ };
+
+ /// An expression that conditionally evaluates to sub-expressions
+ /// depending on a list of predicates.
+ ///
+ /// Individual cases are evaluated in order,
+ /// and the first case whose predicates
+ /// (also called "guards")
+ /// are satisfied will have its expression evaluated and yielded as
+ /// the result of the entire [`CasesExpr`].
+ ///
+ /// If no predicates match,
+ /// [`OtherwiseExpr`] is evaluated,
+ /// if pressent,
+ /// otherwise the value `0` is yielded.
+ CasesExpr := QN_C_CASES {
+ @ {
+ _label: (QN_LABEL?) => Option<N<{DescLiteral}>>,
+ } => SugaredNir::Todo,
+
+ CaseExpr,
+ OtherwiseExpr,
+ };
+
+ /// A predicated case of a [`CasesExpr`] with an associated
+ /// [`CalcExpr`].
+ ///
+ /// Cases are evaluated in the order in which they appear.
+ /// If all of the [`WhenExpr`]s evaluate truthfully,
+ /// then the inner [`CalcExpr`] will be evaluated and its result
+ /// yielded as the value of this expression
+ /// (and therefore the result of the parent [`CasesExpr`]).
+ /// Otherwise,
+ /// evaluation continues with the next sibling case,
+ /// if any.
+ CaseExpr := QN_C_CASE {
+ @ {
+ _label: (QN_LABEL?) => Option<N<{DescLiteral}>>,
+ } => SugaredNir::Todo,
+
+ WhenExpr,
+ CalcExpr,
+ };
+
+ /// A case of a [`CasesExpr`] that always matches.
+ ///
+ /// This should be used as a catch-all when no sibling [`CaseExpr`]
+ /// matches.
+ /// The inner [`CalcExpr`] will be evaluated and its result yielded as
+ /// the result of this expression
+ /// (and therefore the result of the parent [`CasesExpr`]).
+ ///
+ /// In absence of this expression,
+ /// [`CasesExpr`] may fall through with no matching expressions and
+ /// yield `0`.
+ /// If this behavior is unclear within a given context,
+ /// then [`OtherwiseExpr`] ought to be used to make the behavior
+ /// explicit.
+ OtherwiseExpr := QN_C_OTHERWISE {
+ @ {
+ _label: (QN_LABEL?) => Option<N<{DescLiteral}>>,
+ } => SugaredNir::Todo,
+
+ CalcExpr,
+ };
+
+ /// Length of a vector (|**v**|).
+ ///
+ /// This also yields the number of rows of a matrix,
+ /// which are vectors of vectors.
+ /// It is not defined for scalars.
+ LengthOfExpr := QN_C_LENGTH_OF {
+ @ {} => SugaredNir::Todo,
+ CalcExpr,
+ };
+
+ /// Let expression.
+ ///
+ /// This is equivalent to a let expression in the Lisp family of
+ /// languages,
+ /// where the inner [`LetValues`] defines a set of mutually
+ /// independent expressions whose associated identifiers are
+ /// lexically scoped to the inner [`CalcExpr`].
+ /// The result of the let expression is the result of the inner
+ /// [`CalcExpr`].
+ LetExpr := QN_C_LET {
+ @ {} => SugaredNir::Todo,
+ LetValues,
+ CalcExpr,
+ };
+
+ /// A set of mutually independent expressions and associated identifiers
+ /// to be lexically scoped to the sibling [`CalcExpr`].
+ ///
+ /// See [`LetExpr`] for more information.
+ LetValues := QN_C_VALUES {
+ @ {} => SugaredNir::Todo,
+ LetValue,
+ };
+
+ /// An expression bound to an associated identifier that is lexically
+ /// scoped to a parent [`LetValues`]' sibling [`CalcExpr`].
+ ///
+ /// A value cannot observe sibling values,
+ /// but it can observe values of an ancestor [`LetExpr`] that is not
+ /// its parent.
+ LetValue := QN_C_VALUE {
+ @ {
+ _name: (QN_NAME) => N<{ParamIdent}>,
+ _ty: (QN_TYPE) => N<{TypeIdent}>,
+ // Misnomer
+ _set: (QN_SET?) => Option<N<{Dim}>>,
+ _desc: (QN_DESC?) => Option<N<{DescLiteral}>>,
+ } => SugaredNir::Todo,
+
+ CalcExpr,
+ };
+
+ /// An expression yielding a vector consisting of each of its child
+ /// expressions' values as respective items.
+ VectorExpr := QN_C_VECTOR {
+ @ {
+ _label: (QN_LABEL?) => Option<N<{DescLiteral}>>,
+ } => SugaredNir::Todo,
+
+ CalcExpr,
+ };
+
+ /// Function application.
+ ///
+ /// The value of the expression is the return value of the function
+ /// applied to its argument list [`ApplyArg`].
+ ///
+ /// The attribute [`@name`](QN_NAME) contains the name of the function
+ /// to apply.
+ /// All other arguments are desugared into child [`ApplyArg`]s with a
+ /// body [`ValueOfExpr`] such that `α="x"` expands into
+ /// `<`[`c:arg`](QN_C_ARG)` name="α"><`[`c:value-of`](QN_C_VALUE_OF)
+ /// `name="x" /></c:arg>`.
+ ApplyExpr := QN_C_APPLY {
+ @ {} => SugaredNir::Todo,
+
+ [attr](_attr) => SugaredNir::Todo,
+
+ ApplyArg,
+ };
+
+ /// Argument for function application.
+ ///
+ /// Alternatively,
+ /// the parent element [`ApplyExpr`] may contain short-hand arguments
+ /// as attributes.
+ ApplyArg := QN_C_ARG {
+ @ {
+ _name: (QN_NAME) => N<{ParamIdent}>,
+ } => SugaredNir::Todo,
+
+ CalcExpr,
+ };
+
+ /// Function application recursing on the parent [`ApplyExpr`].
+ ///
+ /// This expression desugars into an [`ApplyExpr`] with the same name as
+ /// the parent [`ApplyExpr`] and copies all parent [`ApplyArg`]
+ /// expressions.
+ /// Any child [`ApplyArg`] of this expression will override the
+ /// arguments of the parent,
+ /// allowing for concise recursion in terms of only what has changed
+ /// in that recursive step.
+ RecurseExpr := QN_C_RECURSE {
+ @ {} => SugaredNir::Todo,
+
+ [attr](_attr) => SugaredNir::Todo,
+
+ ApplyArg,
+ };
+
+ /// Construct a list (vector) by providing a new head ("car") and a
+ /// (possibly empty) tail ("cdr").
+ ///
+ /// This terminology originates from Lisp.
+ /// It is equivalent to an `unshift` operation.
+ ConsExpr := QN_C_CONS {
+ @ {} => SugaredNir::Todo,
+ CalcExpr,
+ };
+
+ /// Retrieve the first element in a list (vector).
+ ///
+ /// This terminology originates from Lisp.
+ CarExpr := QN_C_CAR {
+ @ {
+ _label: (QN_LABEL?) => Option<N<{DescLiteral}>>,
+ } => SugaredNir::Todo,
+ CalcExpr,
+ };
+
+ /// Retrieve all but the first element of a list (vector).
+ ///
+ /// This terminology originates from Lisp,
+ /// and is pronounced "could-er".
+ /// It is also called "tail".
+ CdrExpr := QN_C_CDR {
+ @ {
+ _label: (QN_LABEL?) => Option<N<{DescLiteral}>>,
+ } => SugaredNir::Todo,
+ CalcExpr,
+ };
+
+ /// Predicate the parent expression,
+ /// producing a value of `0` if the predicate does not match.
+ ///
+ /// In expressions that do not require the use of [`WhenExpr`] as a
+ /// guard,
+ /// this is styled and interpreted as Iverson's brackets,
+ /// but there is no distinction between using [`WhenExpr`] and
+ /// multiplying by the value of the predicate;
+ /// the two forms are a matter of style.
+ ///
+ /// The exception is [`CaseExpr`],
+ /// which requires [`WhenExpr`] as part of its grammar to define
+ /// conditions for which case to evaluate.
+ WhenExpr := QN_C_WHEN {
+ @ {
+ _name: (QN_NAME) => N<{ValueIdent}>,
+ _index: (QN_INDEX?) => Option<N<{ValueIdent}>>,
+ _value: (QN_VALUE?) => Option<N<{ValueIdent}>>,
+ } => SugaredNir::Todo,
+
+ CalcPredExpr,
+ };
+
+ /// Calculation predicates.
+ ///
+ /// These predicates are used to compare two values.
+ /// They are used by [`WhenExpr`] and [`MatchExpr`].
+ CalcPredExpr := (
+ EqCalcPredExpr
+ | NeCalcPredExpr
+ | LtCalcPredExpr
+ | GtCalcPredExpr
+ | LteCalcPredExpr
+ | GteCalcPredExpr
+ );
+
+ /// Equality predicate (=).
+ EqCalcPredExpr := QN_C_EQ {
+ @ {} => SugaredNir::Todo,
+ CalcExpr,
+ };
+
+ /// Non-equality predicate (≠).
+ NeCalcPredExpr := QN_C_NE {
+ @ {} => SugaredNir::Todo,
+ CalcExpr,
+ };
+
+ /// Less-than predicate (<).
+ LtCalcPredExpr := QN_C_LT {
+ @ {} => SugaredNir::Todo,
+ CalcExpr,
+ };
+
+ /// Greater-than predicate (>).
+ GtCalcPredExpr := QN_C_GT {
+ @ {} => SugaredNir::Todo,
+ CalcExpr,
+ };
+
+ /// Less-than or equality predicate (≤).
+ LteCalcPredExpr := QN_C_LTE {
+ @ {} => SugaredNir::Todo,
+ CalcExpr,
+ };
+
+ /// Greater-than or equality predicate (≥).
+ GteCalcPredExpr := QN_C_GTE {
+ @ {} => SugaredNir::Todo,
+ CalcExpr,
+ };
+
+
+
+ /////////////////////////
+ ////////////////////////
+ ////
+ //// Map Packages
+ ////
+
+
+ /// Define a mapping from a Liza program definition into TAME
+ /// parameters.
+ ///
+ /// The coupling of this mapping is historical,
+ /// since TAME was developed to work with the Liza data collection
+ /// framework.
+ /// The mapping occurs between the bucket and TAME params.
+ ///
+ /// This will be generalized in the future.
+ ProgramMapStmt := QN_PROGRAM_MAP {
+ @ {
+ _xmlns: (QN_XMLNS) => Literal<URI_LV_PROGRAM_MAP>,
+ _xmlnslv: (QN_XMLNS_LV) => Literal<URI_LV_RATER>,
+ _src: (QN_SRC) => N<{PkgPath}>,
+ } => SugaredNir::Todo,
+
+ MapPkgImportStmt,
+ MapImportStmt,
+ MapBody,
+ };
+
+ /// Declare a mapping from TAME values into a key/value object to be
+ /// returned to the caller.
+ ///
+ /// This decouples TAME's calculations from the interface expected by
+ /// the caller.
+ /// This is also the only place where TAME is able to produce dynamic
+ /// string values.
+ ReturnMapStmt := QN_RETURN_MAP {
+ @ {
+ _xmlns: (QN_XMLNS) => Literal<URI_LV_PROGRAM_MAP>,
+ _xmlnslv: (QN_XMLNS_LV) => Literal<URI_LV_RATER>,
+ } => SugaredNir::Todo,
+
+ MapPkgImportStmt,
+ MapImportStmt,
+ MapBody,
+ };
+
+ /// Alias for [`ImportStmt`].
+ ///
+ /// This is only necessary because of [`MapImportStmt`];
+ /// both that and [`MapPkgImportStmt`] will be removed in the future
+ /// in favor of [`ImportStmt`].
+ MapPkgImportStmt := QN_LV_IMPORT {
+ @ {
+ _package: (QN_PACKAGE) => N<{PkgPath}>,
+ _export: (QN_EXPORT?) => Option<N<{BooleanLiteral}>>,
+ } => SugaredNir::Todo,
+ };
+
+ /// Import a map package.
+ ///
+ /// The distinction between this an [`ImportStmt`] is historical and is
+ /// no longer meaningful;
+ /// it will be removed in the future.
+ MapImportStmt := QN_IMPORT {
+ @ {
+ _path: (QN_PATH) => N<{PkgPath}>,
+ } => SugaredNir::Todo,
+ };
+
+ /// Define the value of a key in the destination.
+ MapBody := (MapPassStmt | MapStmt);
+
+ /// Map a value into a key of the destination without modification.
+ ///
+ /// See also [`MapStmt`] if the value needs to be modified in some way.
+ MapPassStmt := QN_PASS {
+ @ {
+ _name: (QN_NAME) => N<{AnyIdent}>,
+ _default: (QN_DEFAULT?) => Option<N<{NumLiteral}>>,
+ _scalar: (QN_SCALAR?) => Option<N<{BooleanLiteral}>>,
+ _override: (QN_OVERRIDE?) => Option<N<{BooleanLiteral}>>,
+ _novalidate: (QN_NOVALIDATE?) => Option<N<{BooleanLiteral}>>,
+ } => SugaredNir::Todo,
+ };
+
+ /// Map a value into a key of the destination.
+ ///
+ /// See also [`MapPassStmt`] if the value does not need modification.
+ MapStmt := QN_MAP {
+ @ {
+ _to: (QN_TO) => N<{AnyIdent}>,
+ _from: (QN_FROM?) => Option<N<{AnyIdent}>>,
+ // We need to be permissive in what we accept since this may
+ // match in different contexts;
+ // downstream IR will validate the against the map
+ // destination.
+ _value: (QN_VALUE?) => Option<N<{StringLiteral}>>,
+ _default: (QN_DEFAULT?) => Option<N<{NumLiteral}>>,
+ _scalar: (QN_SCALAR?) => Option<N<{BooleanLiteral}>>,
+ _override: (QN_OVERRIDE?) => Option<N<{BooleanLiteral}>>,
+ _novalidate: (QN_NOVALIDATE?) => Option<N<{BooleanLiteral}>>,
+ } => SugaredNir::Todo,
+
+ MapStmtBody,
+ };
+
+ /// Methods for mapping a value.
+ MapStmtBody := (MapFromStmt | MapSetStmt | MapTransformStmt);
+
+ /// Source of data for a map operation.
+ MapFromStmt := QN_FROM {
+ @ {
+ _name: (QN_NAME) => N<{AnyIdent}>,
+ _default: (QN_DEFAULT?) => Option<N<{NumLiteral}>>,
+ _scalar: (QN_SCALAR?) => Option<N<{BooleanLiteral}>>,
+ _novalidate: (QN_NOVALIDATE?) => Option<N<{BooleanLiteral}>>,
+ } => SugaredNir::Todo,
+
+ MapTranslateStmt,
+ };
+
+ /// List of 1:1 value translations for a map.
+ MapTranslateStmt := QN_TRANSLATE {
+ @ {
+ _key: (QN_KEY) => N<{StringLiteral}>,
+ _value: (QN_VALUE) => N<{NumLiteral}>,
+ } => SugaredNir::Todo,
+ };
+
+ /// Yield a vector of values where each item corresponds to the
+ /// respective child expression.
+ ///
+ /// TODO: This is a misnomer,
+ /// since the result is a vector,
+ /// not a set.
+ MapSetStmt := QN_SET {
+ @ {} => SugaredNir::Todo,
+
+ MapSetBody,
+ };
+
+ /// Permitted mappings in a [`MapSetStmt`].
+ MapSetBody := (MapFromStmt | MapConstStmt);
+
+ /// Map from a constant value.
+ MapConstStmt := QN_CONST {
+ @ {
+ _value: (QN_VALUE) => N<{StringLiteral}>,
+ } => SugaredNir::Todo,
+ };
+
+ /// Transform a value using some function.
+ ///
+ /// This is currently only meaningful for string inputs,
+ /// for example to convert input string case and hash values.
+ ///
+ /// Transformations may be composed via nesting.
+ MapTransformStmt := QN_TRANSFORM {
+ @ {
+ _method: (QN_METHOD) => N<{MapTransformLiteral}>,
+ } => SugaredNir::Todo,
+
+ MapStmtBody,
+ };
+
+
+ /////////////////////////
+ ////////////////////////
+ ////
+ //// Worksheets
+ ////
+
+
+ /// Define a calculation worksheet.
+ ///
+ /// This is also referred to as a "rating worksheet" because of TAME's
+ /// history as an insurance rating system.
+ ///
+ /// A worksheet displays simplified human-readable calculations and
+ /// their results.
+ /// This is an alternative to the Summary Page,
+ /// which provides a complete view of the system and is likely far too
+ /// much information for most users.
+ ///
+ /// Calculations are rendered in the order in which they appear in this
+ /// definition.
+ WorksheetStmt := QN_WORKSHEET {
+ @ {
+ _xmlns: (QN_XMLNS) => Literal<URI_LV_WORKSHEET>,
+
+ _name: (QN_NAME) => N<{PkgPath}>,
+ _pkg: (QN_PACKAGE) => N<{PkgPath}>,
+ } => SugaredNir::Todo,
+
+ ExpandFunctionStmt,
+ DisplayStmt,
+ };
+
+ /// Render function arguments when encountered within a calculation
+ /// referenced by [`DisplayStmt`].
+ ///
+ /// If a function is not expanded,
+ /// then its application is replaced with the name of the function.
+ /// The default behavior is intended to encapsulate details of functions
+ /// that happen to be used by the system but that the user is unlikely
+ /// to care about.
+ ExpandFunctionStmt := QN_EXPAND_FUNCTION {
+ @ {
+ _name: (QN_NAME) => N<{FuncIdent}>,
+ } => SugaredNir::Todo,
+ };
+
+ /// Render a simplified, human-readable display of the calculation,
+ /// along with its result.
+ DisplayStmt := QN_DISPLAY {
+ @ {
+ _name: (QN_NAME) => N<{ValueIdent}>,
+ } => SugaredNir::Todo,
+ };
+
+
+
+ /////////////////////////
+ ////////////////////////
+ ////
+ //// Template System
+ ////
+
+
+ /// Any statement or expression that may conceivably be permitted within
+ /// the expansion context of a template.
+ ///
+ /// Since templates may be used almost anywhere,
+ /// NIR must accept any statement or expression that is valid in an
+ /// expansion context.
+ /// This must include not only the toplevel statements and expressions,
+ /// such as [`PkgBodyStmt`],
+ /// but also _inner_ statements.
+ /// For example,
+ /// consider this common pattern:
+ ///
+ /// ```xml
+ /// <c:cases>
+ /// <c:case>
+ /// <t:when-gt name="foo" value="#5" />
+ /// <c:value-of name="bar" />
+ /// </c:case>
+ ///
+ /// <!-- ... -->
+ /// </c:cases>
+ /// ```
+ ///
+ /// In the above [`CasesExpr`],
+ /// a template appears where a [`WhenExpr`] is expected,
+ /// within a [`CaseExpr`].
+ /// The template `__when-gt__` will be defined something like this:
+ ///
+ /// ```xml
+ /// <template name="__when-gt__" desc="...">
+ /// <!-- params ... -->
+ ///
+ /// <c:when name="@name@">
+ /// <c:gt>
+ /// <c:value-of name="@value@" />
+ /// </c:gt>
+ /// </c:when>
+ /// </template>
+ /// ```
+ ///
+ /// Therefore,
+ /// [`WhenExpr`] must be permitted as a direct child of
+ /// [`TemplateStmt`].
+ /// Whether or not such a thing is semantically valid depends on the
+ /// context in which the application of `__when-gt__` occurs,
+ /// which cannot be known by NIR since templates are not evaluated
+ /// at this stage;
+ /// that is the responsibility of later lowering stages.
+ AnyStmtOrExpr := (
+ PkgBodyStmt
+ // Until we fix QN_SET ambiguity, this should take precedence.
+ | InlineTemplateArgSet
+ | PkgStmtInner
+ | LogExpr
+ | CalcExpr
+ | CalcExprInner
+ );
+
+
+ /// Define a template.
+ ///
+ /// Templates are TAME's metaprogramming facility and allow for
+ /// extending the grammar of TAME.
+ /// The body of a template is expanded into its application site.
+ ///
+ /// A template may expand into multiple statements or expressions,
+ /// or even a mix of both,
+ /// with statements being hoisted automatically out of an expression
+ /// context.
+ ///
+ /// For more information on what may be contained within a template body
+ /// and the context of its expansion,
+ /// see [`AnyStmtOrExpr`].
+ ///
+ /// See also [`InlineTemplate`] for template definitions.
+ ///
+ /// Templates are applied using [`ApplyTemplate`] or [`TplApplyShort`].
+ TemplateStmt := QN_TEMPLATE {
+ @ {
+ _name: (QN_NAME) => N<{TplIdent}>,
+ _desc: (QN_DESC) => N<{DescLiteral}>,
+ } => SugaredNir::Todo,
+
+ TplHeading,
+ AnyStmtOrExpr,
+ };
+
+ /// Heading of a template definition.
+ ///
+ /// The should consist entirely of [`TplParamStmt`],
+ /// but there is also a convention of placing [`TplIf`] and
+ /// [`TplUnless`] alongside those params when they perform input
+ /// validation.
+ TplHeading := (TplParamStmt | TplIf | TplUnless);
+
+ /// Define a template parameter.
+ ///
+ /// Template parameters have the form `@name@` and represent
+ /// placeholders for expansion data.
+ /// Parameters are treated as string data during application,
+ /// but their final type depends on the context into which they are
+ /// expanded.
+ TplParamStmt := QN_PARAM {
+ @ {
+ _name: (QN_NAME) => N<{TplParamIdent}>,
+ _desc: (QN_DESC) => N<{DescLiteral}>,
+ } => SugaredNir::Todo,
+
+ TplParamDefault,
+ };
+
+ /// Define the default value for a parameter when a value is not
+ /// provided by a template application.
+ ///
+ /// When a template is applied using [`ApplyTemplate`] or
+ /// [`TplApplyShort`],
+ /// a parameter will evaluate this default expression if there is no
+ /// argument present with the same name as the parameter.
+ TplParamDefault := (
+ TplText
+ | TplParamValue
+ | TplParamInherit
+ | TplParamAdd
+ | TplParamClassToYields
+ | TplParamTypedefLookup
+ | TplParamSymValue
+ );
+
+ /// Default a parameter to a string value.
+ ///
+ /// All template params are strings until they are expanded into a
+ /// context,
+ /// so this can be used for everything from identifier generation to
+ /// providing constant values.
+ /// The result will be as if the user typed the text themselves in the
+ /// associated template application argument.
+ TplText := QN_TEXT {
+ @ {
+ _unique: (QN_UNIQUE?) => Option<N<{BooleanLiteral}>>,
+ } => SugaredNir::Todo,
+ };
+
+ /// Default the param to the value of another template param,
+ /// optionally transformed.
+ ///
+ /// This is used primarily for generating identifiers.
+ /// This list of attributes represent methods to be applied.
+ ///
+ /// This list will be refined further in TAMER,
+ /// since manipulation of values in the XSLT-based TAME was
+ /// cumbersome and slow
+ TplParamValue := QN_PARAM_VALUE {
+ @ {
+ _name: (QN_NAME) => N<{ParamIdent}>,
+ _dash: (QN_DASH?) => Option<N<{BooleanLiteral}>>,
+ _upper: (QN_UPPER?) => Option<N<{BooleanLiteral}>>,
+ _lower: (QN_LOWER?) => Option<N<{BooleanLiteral}>>,
+ _ucfirst: (QN_UCFIRST?) => Option<N<{BooleanLiteral}>>,
+ _rmdash: (QN_RMDASH?) => Option<N<{BooleanLiteral}>>,
+ _rmunderscore: (QN_RMUNDERSCORE?) => Option<N<{BooleanLiteral}>>,
+ _identifier: (QN_IDENTIFIER?) => Option<N<{BooleanLiteral}>>,
+ _snake: (QN_SNAKE?) => Option<N<{BooleanLiteral}>>,
+ } => SugaredNir::Todo,
+ };
+
+ /// Inherit a default value from a metavalue.
+ ///
+ /// Metavalues allow templates to communicate with one-another in an
+ /// expansion environment.
+ /// They are defined using [`TplParamMeta`],
+ /// and this expression will retrieve the "closest" preceding value
+ /// from siblings and ancestors,
+ /// which is defined lexically relative to the expansion position
+ /// of the template.
+ TplParamInherit := QN_PARAM_INHERIT {
+ @ {
+ _meta: (QN_META) => N<{TplMetaIdent}>,
+ } => SugaredNir::Todo,
+ };
+
+ /// Sum a numeric value with a numeric template parameter.
+ ///
+ /// Combined with [`TplParamInherit`],
+ /// this can be used to perform bounded recursive template expansion.
+ TplParamAdd := QN_PARAM_ADD {
+ @ {
+ _name: (QN_NAME) => N<{TplParamIdent}>,
+ _value: (QN_VALUE) => N<{NumLiteral}>,
+ } => SugaredNir::Todo,
+ };
+
+ /// Look up the [`@yields`](QN_YIELDS) of a [`ClassifyStmt`].
+ ///
+ /// This allows templates to accept classification names and use them in
+ /// an expression context.
+ /// This is necessary since,
+ /// for historical reasons (accumulators),
+ /// classification names do not represent values.
+ /// Instead,
+ /// to treat a classification as a value,
+ /// its corresponding [`@yields`](QN_YIELDS) must be used.
+ ///
+ /// Every [`ClassifyStmt`] has a yields generated for it if one is not
+ /// defined,
+ /// so this will always produce some valid identifier for a
+ /// classification.
+ TplParamClassToYields := QN_PARAM_CLASS_TO_YIELDS {
+ @ {
+ _name: (QN_NAME) => N<{ClassIdent}>,
+ } => SugaredNir::Todo,
+ };
+
+ /// Given a numeric literal,
+ /// look up the associated constant item identifier within the
+ /// provided type [`@name`](QN_NAME).
+ ///
+ /// The type must have been defined using [`TypedefStmt`] and must
+ /// utilize [`EnumStmt`].
+ ///
+ /// Since all values in TAME are referentially transparent,
+ /// this has no effect at runtime.
+ /// Instead,
+ /// the purpose of this template is to allow generated code to
+ /// do two things:
+ ///
+ /// 1. Ensure that a numeric value is within the domain of a given
+ /// type at compile time; and
+ /// 2. Produce an edge to that item
+ /// (and consequently type)
+ /// in TAME's dependency graph.
+ ///
+ /// By providing an edge in the dependency graph to that item,
+ /// the graph can be used to query for what parts of the system
+ /// utilize particular values within the context of a type.
+ ///
+ /// In this sense,
+ /// this introduces a form of nominal typing,
+ /// where the type can be used as a database of values and the
+ /// dependency graph can be used as a database of references.
+ ///
+ /// For example,
+ /// in insurance,
+ /// a _class code_ is a numeric identifier representing some type of
+ /// potentially insurable risk.
+ /// By defining those class codes in types,
+ /// the system can be used to accurately report on what calculations
+ /// and classifications are affected by that class code.
+ /// Without the use of types,
+ /// querying for a constant numeric value would be ambiguous and
+ /// potentially yield false matches.
+ TplParamTypedefLookup := QN_PARAM_TYPEDEF_LOOKUP {
+ @ {
+ _name: (QN_NAME) => N<{TypeIdent}>,
+ _value: (QN_VALUE) => N<{NumLiteral}>,
+ } => SugaredNir::Todo,
+ };
+
+ /// Look up an attribute from the symbol table for a given identifier.
+ TplParamSymValue := QN_PARAM_SYM_VALUE {
+ @ {
+ _name: (QN_NAME) => N<{AnyIdent}>,
+ _value: (QN_VALUE) => N<{SymbolTableKey}>,
+ _prefix: (QN_PREFIX?) => Option<N<{AnyIdent}>>,
+ _ignore_missing: (QN_IGNORE_MISSING?) => Option<N<{BooleanLiteral}>>,
+ } => SugaredNir::Todo,
+ };
+
+ /// Keywords that trigger template expansion.
+ ///
+ /// These expressions may appear virtually anywhere in NIR,
+ /// since templates may be used to augment virtually every portion of
+ /// TAME's grammar.
+ /// The context into which a template is expanded may not be valid,
+ /// but this will not be known until templates are evaluated,
+ /// which is not the responsibility of NIR.
+ ///
+ /// Note that these are expressions in a compile-time _expansion_
+ /// context,
+ /// not a runtime calculation context as other expressions in NIR.
+ /// The result of a template expression is conceptually an XML tree,
+ /// as if the user pasted the body of the template into place and
+ /// manually replaced all parameters with their intended values.
+ /// Not all expressions yield a tree,
+ /// and some may yield multiple trees;
+ /// NIR does not know or care.
+ TplKw := (
+ ApplyTemplate
+ | TplApplyShort
+ | InlineTemplate
+ | ExpandSequence
+ | ExpandGroup
+ | ExpandBarrier
+ | TplIf
+ | TplUnless
+ | TplParamCopy
+ | TplParamMeta
+ | ErrorKw
+ | WarningKw
+ | DynNode
+ );
+
+ // TODO: This has to go away so that we can always statically lower all
+ // primitives without having to perform template expansion in order to
+ // determine what they may be.
+ DynNode := QN_DYN_NODE {
+ @ {
+ _name: (QN_NAME) => N<{DynNodeLiteral}>,
+ } => SugaredNir::Todo,
+
+ // But we can at least restrict it for now by ensuring that it's
+ // used only to contain expressions.
+ CalcExpr,
+ };
+
+ /// Produce a compiler error whose message is the expansion of the body
+ /// of this expression.
+ ///
+ /// This template yields an empty result.
+ ///
+ /// Errors will result in a compilation failure.
+ /// See also [`WarningKw`] to provide a message to the user as
+ /// compiler output without failing compilation.
+ ErrorKw := QN_ERROR {
+ @ {} => SugaredNir::Todo,
+
+ // In addition to text that is globally permitted.
+ TplParamValue,
+ };
+
+ /// Produce a compiler warning whose message is the expansion of the
+ /// body of this expression.
+ ///
+ /// This template yields an empty result.
+ ///
+ /// Warnings do not result in a compilation failure and may therefore be
+ /// missed in a sea of build output;
+ /// you should consider using [`ErrorKw`] whenever possible to
+ /// ensure that problems are immediately resolved.
+ WarningKw := QN_WARNING {
+ @ {} => SugaredNir::Todo,
+
+ // In addition to text that is globally permitted.
+ TplParamValue,
+ };
+
+ /// Long-form template application.
+ ///
+ /// This is neither a statement nor an expression as a part of this
+ /// grammar,
+ /// because this application is replaced entirely with its body
+ /// during expansion.
+ /// Further,
+ /// the template could expand into multiple statements or expressions,
+ /// or even a mix of the two
+ /// (with statements hoisted out of expressions).
+ ///
+ /// TODO: This is apparently unused by the current system,
+ /// in favor of a transition to [`TplApplyShort`],
+ /// but this is still needed to support dynamic template application
+ /// (templates whose names are derived from other template inputs).
+ ApplyTemplate := QN_APPLY_TEMPLATE {
+ @ {} => SugaredNir::Todo,
+
+ // TODO
+ };
+
+ /// Short-hand template application.
+ ///
+ /// This expands into an equivalent [`ApplyTemplate`] form where each
+ /// attribute is a template argument,
+ /// and where the body of this application is the `@values@`
+ /// template argument.
+ /// See [`ApplyTemplate`] for more information.
+ TplApplyShort := NS_T {
+ @ {} => SugaredNir::Todo,
+
+ // Streaming attribute parsing;
+ // this takes precedence over any attribute parsing above
+ // (which is used only for emitting the opening object).
+ [attr](_attr) => SugaredNir::Todo,
+
+ // Template bodies depend on context,
+ // so we have to just accept everything and defer to a future
+ // lowering operation to validate semantics.
+ AnyStmtOrExpr,
+ };
+
+ /// Define an anonymous template and immediately apply it zero or more
+ /// times.
+ ///
+ /// Inline templates allow for the definition of a template at its
+ /// expansion site,
+ /// where a re-usable named template is not necessary.
+ ///
+ /// Inline templates are also used for iterating over a list defined by
+ /// [`InlineTemplateForEach`],
+ /// and have the unique ability to perform symbol table
+ /// introspection using [`InlineTemplateSymSet`].
+ InlineTemplate := QN_INLINE_TEMPLATE {
+ @ {} => SugaredNir::Todo,
+
+ InlineTemplateForEach,
+ AnyStmtOrExpr,
+ };
+
+ /// Define a list of [`InlineTemplateArgs`] over which an inline
+ /// template will be applied.
+ ///
+ /// If there are N [`InlineTemplateArgs`],
+ /// then the body of the parent [`InlineTemplate`] will be applied
+ /// N times,
+ /// each with the respective [`InlineTemplateArgs`] set as its
+ /// arguments.
+ InlineTemplateForEach := QN_FOR_EACH {
+ @ {} => SugaredNir::Todo,
+
+ InlineTemplateArgs,
+ };
+
+ /// Inline template argument sets.
+ InlineTemplateArgs := (InlineTemplateArgSet | InlineTemplateSymSet);
+
+ /// Define an argument set for an ancestor [`InlineTemplate`]
+ /// application.
+ ///
+ /// Each key represents the name of a template parameter,
+ /// and the value represents the string value to bind to that
+ /// parameter as an argument.
+ ///
+ /// See also parent [`InlineTemplateForEach`].
+ InlineTemplateArgSet := QN_SET {
+ @ {} => SugaredNir::Todo,
+
+ // Streaming attribute parsing.
+ [attr](_attr) => SugaredNir::Todo,
+
+ // TODO: REMOVE ME
+ // (bug in `ele_parse!` requiring at least one NT in this
+ // context.)
+ CalcExpr,
+ };
+
+ /// Derive template arguments from symbol table introspection.
+ ///
+ /// This defines template arguments for the ancestor [`InlineTemplate`]
+ /// by querying the symbol table and exposing attributes associated
+ /// with that symbol.
+ ///
+ /// See also [`ExpandSequence`] to control when symbol table querying
+ /// takes place to ensure that all identifiers in the same package are
+ /// defined before querying.
+ ///
+ /// TODO: This is a really powerful feature that needs plenty of
+ /// documentation and examples.
+ InlineTemplateSymSet := QN_SYM_SET {
+ @ {
+ _name_prefix: (QN_NAME_PREFIX?) => Option<N<{StringLiteral}>>,
+ _type: (QN_TYPE?) => Option<N<{IdentType}>>,
+ // TODO: Look at XSL sources for others
+ } => SugaredNir::Todo,
+ };
+
+ /// Perform template expansion on each successive child node in order,
+ /// as if it were a separate template pass each time.
+ ///
+ /// Each child is recursively expanded before moving on to expansion of
+ /// the next child.
+ ///
+ /// The purpose of this sequence is to ensure that identifiers are
+ /// defined before templates that query the symbol table via
+ /// [`InlineTemplateSymSet`];
+ /// otherwise.
+ /// It is otherwise not possible to guarantee that identifiers produced
+ /// by template expansions in the same package are complete before
+ /// the query takes place.
+ ///
+ /// The XSLT-based version of TAME forced a separate template pass for
+ /// each and every child in this sequence,
+ /// which is expensive;
+ /// [`ExpandGroup`] was added to help mitigate the cost of this
+ /// operation.
+ ///
+ /// TAMER hopes to remove the need for expansion sequences entirely,
+ /// since it makes complex use of the template system difficult to
+ /// understand,
+ /// and error-prone.
+ /// The concept originates from TeX's `\expandafter`, `\edef`, and
+ /// related macros.
+ ExpandSequence := QN_EXPAND_SEQUENCE {
+ @ {} => SugaredNir::Todo,
+ AnyStmtOrExpr,
+ };
+
+ /// Groups nodes to be expanded together during [`ExpandSequence`].
+ ///
+ /// This exists to work around performance pitfalls of the XSLT-based
+ /// implementation of [`ExpandSequence`];
+ /// see that NT for more information.
+ ExpandGroup := QN_EXPAND_GROUP {
+ @ {} => SugaredNir::Todo,
+ AnyStmtOrExpr,
+ };
+
+ /// Prohibit template expansion beyond this point.
+ ///
+ /// An expansion barrier is a seldom-needed feature that stops the
+ /// template system from expanding its body beyond a certain point,
+ /// which is sometimes needed for template-producing templates.
+ ExpandBarrier := QN_EXPAND_BARRIER {
+ @ {} => SugaredNir::Todo,
+ AnyStmtOrExpr,
+ };
+
+ /// Inline the value of a parameter as a tree.
+ ///
+ /// This is only useful for the special `@values@` parameter,
+ /// whose value is (conceptually) an XML tree.
+ ///
+ /// This allows creating templates that accept children.
+ TplParamCopy := QN_PARAM_COPY {
+ @ {
+ _name: (QN_NAME) => N<{TplParamIdent}>,
+ } => SugaredNir::Todo,
+ };
+
+ /// Define a metavalue at this point in the expansion environment.
+ ///
+ /// For more information on how these values are used,
+ /// see [`TplParamInherit`].
+ TplParamMeta := QN_PARAM_META {
+ @ {
+ _name: (QN_NAME) => N<{TplParamIdent}>,
+ _value: (QN_VALUE) => N<{StringLiteral}>,
+ } => SugaredNir::Todo,
+ };
+
+ /// Conditionally expand the body if the provided predicate matches.
+ TplIf := QN_IF {
+ @ {
+ _name: (QN_NAME) => N<{TplParamIdent}>,
+ _eq: (QN_EQ?) => Option<N<{StringLiteral}>>,
+ _gt: (QN_GT?) => Option<N<{NumLiteral}>>,
+ _gte: (QN_GTE?) => Option<N<{NumLiteral}>>,
+ _lt: (QN_LT?) => Option<N<{NumLiteral}>>,
+ _lte: (QN_LTE?) => Option<N<{NumLiteral}>>,
+ _prefix: (QN_PREFIX?) => Option<N<{StringLiteral}>>,
+ _suffix: (QN_SUFFIX?) => Option<N<{StringLiteral}>>,
+ } => SugaredNir::Todo,
+
+ AnyStmtOrExpr,
+ };
+
+ /// Conditionally expand the body if the provided predicate does not
+ /// match.
+ ///
+ /// This can be used as a sibling of [`TplIf`] to create the equivalent
+ /// of an `else` clause.
+ TplUnless := QN_UNLESS {
+ @ {
+ _name: (QN_NAME) => N<{TplParamIdent}>,
+ _eq: (QN_EQ?) => Option<N<{StringLiteral}>>,
+ _gt: (QN_GT?) => Option<N<{NumLiteral}>>,
+ _gte: (QN_GTE?) => Option<N<{NumLiteral}>>,
+ _lt: (QN_LT?) => Option<N<{NumLiteral}>>,
+ _lte: (QN_LTE?) => Option<N<{NumLiteral}>>,
+ _prefix: (QN_PREFIX?) => Option<N<{StringLiteral}>>,
+ _suffix: (QN_SUFFIX?) => Option<N<{StringLiteral}>>,
+ } => SugaredNir::Todo,
+
+ AnyStmtOrExpr,
+ };
+}
diff --git a/tamer/src/obj/xmlo/air.rs b/tamer/src/obj/xmlo/air.rs
index dedc093..b7b63ae 100644
--- a/tamer/src/obj/xmlo/air.rs
+++ b/tamer/src/obj/xmlo/air.rs
@@ -234,7 +234,7 @@ impl ParseState for XmloToAir {
}
}
- fn is_accepting(&self) -> bool {
+ fn is_accepting(&self, _: &Self::Context) -> bool {
matches!(*self, Self::Done(_))
}
}
@@ -399,17 +399,12 @@ mod test {
num::{Dim, Dtype},
obj::xmlo::{SymAttrs, SymType},
parse::Parsed,
- span::{DUMMY_SPAN, UNKNOWN_SPAN},
+ span::{dummy::*, UNKNOWN_SPAN},
sym::GlobalSymbolIntern,
};
type Sut = XmloToAir;
- const S1: Span = DUMMY_SPAN;
- const S2: Span = S1.offset_add(1).unwrap();
- const S3: Span = S2.offset_add(1).unwrap();
- const S4: Span = S3.offset_add(1).unwrap();
-
#[test]
fn data_from_package_event() {
let name = "name".into();
@@ -428,7 +423,7 @@ mod test {
assert_eq!(Some(Ok(Parsed::Incomplete)), sut.next()); // PkgRootPath
assert_eq!(Some(Ok(Parsed::Incomplete)), sut.next()); // Eoh
- let ctx = sut.finalize().unwrap();
+ let ctx = sut.finalize().unwrap().into_context();
assert_eq!(Some(name), ctx.prog_name);
assert_eq!(Some(relroot), ctx.relroot);
@@ -515,7 +510,7 @@ mod test {
assert_eq!(Some(Ok(Parsed::Incomplete)), sut.next()); // SymDecl (@src)
assert_eq!(Some(Ok(Parsed::Incomplete)), sut.next()); // Eoh
- let ctx = sut.finalize().unwrap();
+ let ctx = sut.finalize().unwrap().into_context();
let mut founds = ctx.found.unwrap().into_iter().collect::<Vec<_>>();
// Just to remove nondeterminism in case the iteration order happens
@@ -626,7 +621,7 @@ mod test {
);
assert_eq!(Some(Ok(Parsed::Incomplete)), sut.next()); // Eoh
- let ctx = sut.finalize().unwrap();
+ let ctx = sut.finalize().unwrap().into_context();
// Both above symbols were local (no `src`),
// but note that we don't care if it's None or initialized with a
diff --git a/tamer/src/obj/xmlo/error.rs b/tamer/src/obj/xmlo/error.rs
index 9620ed6..e346565 100644
--- a/tamer/src/obj/xmlo/error.rs
+++ b/tamer/src/obj/xmlo/error.rs
@@ -23,7 +23,7 @@ use crate::diagnose::{Annotate, AnnotatedSpan, Diagnostic};
use crate::parse::Token;
use crate::span::Span;
use crate::sym::SymbolId;
-use crate::xir::flat::XirfToken;
+use crate::xir::flat::{Text, XirfToken};
use std::fmt::Display;
/// Error during `xmlo` processing.
@@ -38,7 +38,7 @@ use std::fmt::Display;
#[derive(Debug, PartialEq, Eq)]
pub enum XmloError {
/// The root node was not an `lv:package`.
- UnexpectedRoot(XirfToken),
+ UnexpectedRoot(XirfToken<Text>),
/// A `preproc:sym` node was found, but is missing `@name`.
UnassociatedSym(Span),
/// The provided `preproc:sym/@type` is unknown or invalid.
@@ -65,7 +65,7 @@ pub enum XmloError {
/// Ideally we would provide a better error depending on the context,
/// but this serves as a fallback if the input is completely
/// unexpected.
- UnexpectedToken(XirfToken),
+ UnexpectedToken(XirfToken<Text>),
}
impl Display for XmloError {
diff --git a/tamer/src/obj/xmlo/reader.rs b/tamer/src/obj/xmlo/reader.rs
index f4288e3..74f8240 100644
--- a/tamer/src/obj/xmlo/reader.rs
+++ b/tamer/src/obj/xmlo/reader.rs
@@ -24,14 +24,14 @@ use crate::{
num::{Dim, Dtype},
obj::xmlo::SymType,
parse::{
- self, EmptyContext, NoContext, ParseState, Token, Transition,
- TransitionResult, Transitionable,
+ self, ClosedParseState, EmptyContext, NoContext, ParseState, Token,
+ Transition, TransitionResult, Transitionable,
},
span::Span,
sym::{st::raw, SymbolId},
xir::{
attr::{Attr, AttrSpan},
- flat::XirfToken as Xirf,
+ flat::{Text, XirfToken as Xirf},
st::qname::*,
EleSpan, QName,
},
@@ -92,6 +92,10 @@ pub enum XmloToken {
impl parse::Object for XmloToken {}
impl Token for XmloToken {
+ fn ir_name() -> &'static str {
+ "xmlo"
+ }
+
fn span(&self) -> Span {
use XmloToken::*;
@@ -136,11 +140,12 @@ impl Display for XmloToken {
}
/// A parser capable of being composed with [`XmloReader`].
-pub trait XmloState = ParseState<Token = Xirf, Context = EmptyContext>
-where
- Self: Default,
- <Self as ParseState>::Error: Into<XmloError>,
- <Self as ParseState>::Object: Into<XmloToken>;
+pub trait XmloState =
+ ClosedParseState<Token = Xirf<Text>, Context = EmptyContext>
+ where
+ Self: Default,
+ <Self as ParseState>::Error: Into<XmloError>,
+ <Self as ParseState>::Object: Into<XmloToken>;
#[derive(Debug, Default, PartialEq, Eq)]
pub enum XmloReader<
@@ -172,7 +177,7 @@ pub enum XmloReader<
impl<SS: XmloState, SD: XmloState, SF: XmloState> ParseState
for XmloReader<SS, SD, SF>
{
- type Token = Xirf;
+ type Token = Xirf<Text>;
type Object = XmloToken;
type Error = XmloError;
@@ -200,7 +205,7 @@ impl<SS: XmloState, SD: XmloState, SF: XmloState> ParseState
QN_NAME => XmloToken::PkgName(value, aspan.1),
QN_UUROOTPATH => XmloToken::PkgRootPath(value, aspan.1),
QN_PROGRAM => XmloToken::PkgProgramFlag(aspan.0), // yes 0
- QN_ELIG_CLASS_YIELDS => {
+ QN_P_ELIG_CLASS_YIELDS => {
XmloToken::PkgEligClassYields(value, aspan.1)
}
// Ignore unknown attributes for now to maintain BC,
@@ -213,13 +218,13 @@ impl<SS: XmloState, SD: XmloState, SF: XmloState> ParseState
// XIRF guarantees a matching closing tag.
(Package(_), Xirf::Close(..)) => Transition(Done).incomplete(),
- (Package(_), Xirf::Open(QN_SYMTABLE, span, ..)) => {
+ (Package(_), Xirf::Open(QN_P_SYMTABLE, span, ..)) => {
Transition(Symtable(span.tag_span(), SS::default()))
.incomplete()
}
- (Symtable(_, ss), Xirf::Close(Some(QN_SYMTABLE), ..))
- if ss.is_accepting() =>
+ (Symtable(_, ss), Xirf::Close(Some(QN_P_SYMTABLE), ..))
+ if ss.is_accepting(ctx) =>
{
Transition(SymDepsExpected).incomplete()
}
@@ -233,12 +238,12 @@ impl<SS: XmloState, SD: XmloState, SF: XmloState> ParseState
|| unreachable!(), // TODO: currently caught by preceding match
),
- (SymDepsExpected, Xirf::Open(QN_SYM_DEPS, span, _)) => {
+ (SymDepsExpected, Xirf::Open(QN_P_SYM_DEPS, span, _)) => {
Transition(SymDeps(span.tag_span(), SD::default())).incomplete()
}
- (SymDeps(_, sd), Xirf::Close(None | Some(QN_SYM_DEPS), ..))
- if sd.is_accepting() =>
+ (SymDeps(_, sd), Xirf::Close(None | Some(QN_P_SYM_DEPS), ..))
+ if sd.is_accepting(ctx) =>
{
Transition(FragmentsExpected).incomplete()
}
@@ -250,15 +255,15 @@ impl<SS: XmloState, SD: XmloState, SF: XmloState> ParseState
|| unreachable!(), // TODO: currently caught by preceding match
),
- (FragmentsExpected, Xirf::Open(QN_FRAGMENTS, span, _)) => {
+ (FragmentsExpected, Xirf::Open(QN_P_FRAGMENTS, span, _)) => {
Transition(Fragments(span.tag_span(), SF::default()))
.incomplete()
}
(
Fragments(_, sf),
- Xirf::Close(None | Some(QN_FRAGMENTS), span, _),
- ) if sf.is_accepting() => {
+ Xirf::Close(None | Some(QN_P_FRAGMENTS), span, _),
+ ) if sf.is_accepting(ctx) => {
Transition(Eoh).ok(XmloToken::Eoh(span.tag_span()))
}
@@ -282,7 +287,7 @@ impl<SS: XmloState, SD: XmloState, SF: XmloState> ParseState
}
}
- fn is_accepting(&self) -> bool {
+ fn is_accepting(&self, _: &Self::Context) -> bool {
*self == Self::Eoh || *self == Self::Done
}
}
@@ -329,7 +334,7 @@ pub enum SymtableState {
impl parse::Object for (SymbolId, SymAttrs, Span) {}
impl ParseState for SymtableState {
- type Token = Xirf;
+ type Token = Xirf<Text>;
type Object = (SymbolId, SymAttrs, Span);
type Error = XmloError;
@@ -343,7 +348,7 @@ impl ParseState for SymtableState {
match (self, tok) {
(Ready, Xirf::Attr(..)) => Transition(Ready).incomplete(),
- (Ready, Xirf::Open(QN_SYM, span, _)) => {
+ (Ready, Xirf::Open(QN_P_SYM, span, _)) => {
Transition(Sym(span.tag_span(), None, SymAttrs::default()))
.incomplete()
}
@@ -374,7 +379,7 @@ impl ParseState for SymtableState {
// attribute now so we can simplify parsing here.
(
Sym(span_sym, Some(name), attrs),
- Xirf::Open(QN_FROM, span_from, _),
+ Xirf::Open(QN_P_FROM, span_from, _),
) if attrs.ty == Some(SymType::Map)
|| attrs.ty == Some(SymType::RetMap) =>
{
@@ -412,7 +417,7 @@ impl ParseState for SymtableState {
// The old XmloReader ignored these.
(
Sym(span_sym, Some(name), attrs),
- Xirf::Open(QN_SYM_REF, span_ref, _),
+ Xirf::Open(QN_P_SYM_REF, span_ref, _),
) => Transition(SymRef(span_sym, name, attrs, span_ref.tag_span()))
.incomplete(),
@@ -431,7 +436,7 @@ impl ParseState for SymtableState {
}
}
- fn is_accepting(&self) -> bool {
+ fn is_accepting(&self, _: &Self::Context) -> bool {
*self == Self::Ready
}
}
@@ -491,7 +496,7 @@ impl SymtableState {
QN_ISOVERRIDE => {
attrs.override_ = value == L_TRUE;
}
- QN_GENERATED => {
+ QN_P_GENERATED => {
attrs.generated = value == L_TRUE;
}
@@ -615,7 +620,7 @@ pub enum SymDepsState {
}
impl ParseState for SymDepsState {
- type Token = Xirf;
+ type Token = Xirf<Text>;
type Object = XmloToken;
type Error = XmloError;
@@ -629,7 +634,7 @@ impl ParseState for SymDepsState {
match (self, tok) {
(Ready, Xirf::Attr(..)) => Transition(Ready).incomplete(),
- (Ready, Xirf::Open(QN_SYM_DEP, span, _)) => {
+ (Ready, Xirf::Open(QN_P_SYM_DEP, span, _)) => {
Transition(SymUnnamed(span.tag_span())).incomplete()
}
@@ -641,7 +646,7 @@ impl ParseState for SymDepsState {
(SymUnnamed(span), _) => Transition(SymUnnamed(span))
.err(XmloError::UnassociatedSymDep(span)),
- (Sym(span, name), Xirf::Open(QN_SYM_REF, span_ref, _)) => {
+ (Sym(span, name), Xirf::Open(QN_P_SYM_REF, span_ref, _)) => {
Transition(SymRefUnnamed(span, name, span_ref.tag_span()))
.incomplete()
}
@@ -682,7 +687,7 @@ impl ParseState for SymDepsState {
}
}
- fn is_accepting(&self) -> bool {
+ fn is_accepting(&self, _: &Self::Context) -> bool {
*self == Self::Ready
}
}
@@ -726,7 +731,7 @@ pub enum FragmentsState {
}
impl ParseState for FragmentsState {
- type Token = Xirf;
+ type Token = Xirf<Text>;
type Object = XmloToken;
type Error = XmloError;
@@ -740,7 +745,7 @@ impl ParseState for FragmentsState {
match (self, tok) {
(Ready, Xirf::Attr(..)) => Transition(Ready).incomplete(),
- (Ready, Xirf::Open(QN_FRAGMENT, span, _)) => {
+ (Ready, Xirf::Open(QN_P_FRAGMENT, span, _)) => {
Transition(FragmentUnnamed(span.tag_span())).incomplete()
}
@@ -771,7 +776,7 @@ impl ParseState for FragmentsState {
(FragmentUnnamed(span), _) => Transition(FragmentUnnamed(span))
.err(XmloError::UnassociatedFragment(span)),
- (Fragment(span, id), Xirf::Text(text, _)) => {
+ (Fragment(span, id), Xirf::Text(Text(text, _), _)) => {
Transition(FragmentDone(span, id))
.ok(XmloToken::Fragment(id, text, span))
}
@@ -793,7 +798,7 @@ impl ParseState for FragmentsState {
}
}
- fn is_accepting(&self) -> bool {
+ fn is_accepting(&self, _: &Self::Context) -> bool {
*self == Self::Ready
}
}
diff --git a/tamer/src/obj/xmlo/reader/test.rs b/tamer/src/obj/xmlo/reader/test.rs
index b71b1f4..d49db11 100644
--- a/tamer/src/obj/xmlo/reader/test.rs
+++ b/tamer/src/obj/xmlo/reader/test.rs
@@ -25,7 +25,7 @@ use crate::{
num::Dtype,
obj::xmlo::SymType,
parse::{ParseError, ParseState, Parsed},
- span::{Span, DUMMY_SPAN},
+ span::{dummy::*, Span},
sym::GlobalSymbolIntern,
xir::{
attr::Attr,
@@ -37,12 +37,6 @@ use crate::{
},
};
-const S1: Span = DUMMY_SPAN;
-const S2: Span = S1.offset_add(1).unwrap();
-const S3: Span = S2.offset_add(1).unwrap();
-const S4: Span = S3.offset_add(1).unwrap();
-const S5: Span = S4.offset_add(1).unwrap();
-
type Sut = XmloReader;
#[test]
@@ -142,9 +136,9 @@ fn ignores_unknown_package_attr() {
#[test]
fn symtable_err_missing_sym_name() {
let toks = [
- open(QN_SYM, S1, Depth(0)),
+ open(QN_P_SYM, S1, Depth(0)),
// No attributes, but importantly, no name.
- close(Some(QN_SYMTABLE), S2, Depth(0)),
+ close(Some(QN_P_SYMTABLE), S2, Depth(0)),
]
.into_iter();
@@ -171,7 +165,7 @@ macro_rules! symtable_tests {
let name = stringify!($name).intern();
let toks = [
- open(QN_SYM, SSYM, Depth(0)),
+ open(QN_P_SYM, SSYM, Depth(0)),
Xirf::Attr(Attr(QN_NAME, name, AttrSpan(S2, S3))),
$(
Xirf::Attr(Attr(
@@ -180,7 +174,7 @@ macro_rules! symtable_tests {
AttrSpan(S3, SATTRVAL)
)),
)*
- close(Some(QN_SYM), S2, Depth(0)),
+ close(Some(QN_P_SYM), S2, Depth(0)),
]
.into_iter();
@@ -330,14 +324,14 @@ fn symtable_sym_generated_true() {
let name = "generated_true".into();
let toks = [
- open(QN_SYM, SSYM, Depth(0)),
+ open(QN_P_SYM, SSYM, Depth(0)),
Xirf::Attr(Attr(QN_NAME, name, AttrSpan(S2, S3))),
Xirf::Attr(Attr(
("preproc", "generated").unwrap_into(),
raw::L_TRUE,
AttrSpan(S3, S4),
)),
- close(Some(QN_SYM), S2, Depth(0)),
+ close(Some(QN_P_SYM), S2, Depth(0)),
]
.into_iter();
@@ -365,15 +359,15 @@ fn symtable_map_from() {
let map_from = "from-a".into();
let toks = [
- open(QN_SYM, SSYM, Depth(0)),
+ open(QN_P_SYM, SSYM, Depth(0)),
Xirf::Attr(Attr(QN_NAME, name, AttrSpan(S2, S3))),
Xirf::Attr(Attr(QN_TYPE, raw::L_MAP, AttrSpan(S3, S4))),
// <preproc:from>
- open(QN_FROM, S2, Depth(1)),
+ open(QN_P_FROM, S2, Depth(1)),
Xirf::Attr(Attr(QN_NAME, map_from, AttrSpan(S2, S3))),
close_empty(S4, Depth(1)),
// />
- close(Some(QN_SYM), S2, Depth(0)),
+ close(Some(QN_P_SYM), S2, Depth(0)),
]
.into_iter();
@@ -402,15 +396,15 @@ fn symtable_map_from_missing_name() {
let name = "sym-map-from-missing".into();
let toks = [
- open(QN_SYM, SSYM, Depth(0)),
+ open(QN_P_SYM, SSYM, Depth(0)),
Xirf::Attr(Attr(QN_NAME, name, AttrSpan(S2, S3))),
Xirf::Attr(Attr(QN_TYPE, raw::L_MAP, AttrSpan(S3, S4))),
// <preproc:from>
- open(QN_FROM, S2, Depth(1)),
+ open(QN_P_FROM, S2, Depth(1)),
// @name missing
close_empty(S4, Depth(1)),
// />
- close(Some(QN_SYM), S2, Depth(0)),
+ close(Some(QN_P_SYM), S2, Depth(0)),
]
.into_iter();
@@ -427,20 +421,20 @@ fn symtable_map_from_multiple() {
let name = "sym-map-from-missing".into();
let toks = [
- open(QN_SYM, SSYM, Depth(0)),
+ open(QN_P_SYM, SSYM, Depth(0)),
Xirf::Attr(Attr(QN_NAME, name, AttrSpan(S2, S3))),
Xirf::Attr(Attr(QN_TYPE, raw::L_MAP, AttrSpan(S3, S4))),
// <preproc:from>
- open(QN_FROM, S2, Depth(1)),
+ open(QN_P_FROM, S2, Depth(1)),
Xirf::Attr(Attr(QN_NAME, "ok".into(), AttrSpan(S2, S3))),
close_empty(S4, Depth(1)),
// />
// <preproc:from> again (err)
- open(QN_FROM, S3, Depth(1)),
+ open(QN_P_FROM, S3, Depth(1)),
Xirf::Attr(Attr(QN_NAME, "bad".into(), AttrSpan(S2, S3))),
close_empty(S4, Depth(1)),
// />
- close(Some(QN_SYM), S2, Depth(0)),
+ close(Some(QN_P_SYM), S2, Depth(0)),
]
.into_iter();
@@ -458,19 +452,19 @@ fn sym_dep_event() {
let dep2 = "dep2".into();
let toks = [
- open(QN_SYM_DEP, S1, Depth(0)),
+ open(QN_P_SYM_DEP, S1, Depth(0)),
Xirf::Attr(Attr(QN_NAME, name, AttrSpan(S2, S3))),
// <preproc:sym-ref
- open(QN_SYM_REF, S2, Depth(1)),
+ open(QN_P_SYM_REF, S2, Depth(1)),
Xirf::Attr(Attr(QN_NAME, dep1, AttrSpan(S3, S4))),
close_empty(S4, Depth(1)),
// />
// <preproc:sym-ref
- open(QN_SYM_REF, S3, Depth(1)),
+ open(QN_P_SYM_REF, S3, Depth(1)),
Xirf::Attr(Attr(QN_NAME, dep2, AttrSpan(S4, S5))),
close_empty(S4, Depth(1)),
// />
- close(Some(QN_SYM_DEP), S5, Depth(0)),
+ close(Some(QN_P_SYM_DEP), S5, Depth(0)),
]
.into_iter();
@@ -493,9 +487,9 @@ fn sym_dep_event() {
#[test]
fn sym_dep_missing_name() {
let toks = [
- open(QN_SYM_DEP, S1, Depth(0)),
+ open(QN_P_SYM_DEP, S1, Depth(0)),
// missing @name, causes error
- open(QN_SYM_REF, S2, Depth(1)),
+ open(QN_P_SYM_REF, S2, Depth(1)),
]
.into_iter();
@@ -511,9 +505,9 @@ fn sym_ref_missing_name() {
let name = "depsym".into();
let toks = [
- open(QN_SYM_DEP, S1, Depth(0)),
+ open(QN_P_SYM_DEP, S1, Depth(0)),
Xirf::Attr(Attr(QN_NAME, name, AttrSpan(S2, S3))),
- open(QN_SYM_REF, S2, Depth(1)),
+ open(QN_P_SYM_REF, S2, Depth(1)),
// missing @name, causes error
close_empty(S3, Depth(1)),
]
@@ -535,15 +529,15 @@ fn sym_fragment_event() {
let toks = [
// first
- open(QN_FRAGMENT, S1, Depth(0)),
+ open(QN_P_FRAGMENT, S1, Depth(0)),
Xirf::Attr(Attr(QN_ID, id1, AttrSpan(S2, S3))),
- Xirf::Text(frag1, S4),
- close(Some(QN_FRAGMENT), S5, Depth(0)),
+ Xirf::Text(Text(frag1, S4), Depth(1)),
+ close(Some(QN_P_FRAGMENT), S5, Depth(0)),
// second
- open(QN_FRAGMENT, S2, Depth(0)),
+ open(QN_P_FRAGMENT, S2, Depth(0)),
Xirf::Attr(Attr(QN_ID, id2, AttrSpan(S3, S4))),
- Xirf::Text(frag2, S5),
- close(Some(QN_FRAGMENT), S5, Depth(0)),
+ Xirf::Text(Text(frag2, S5), Depth(1)),
+ close(Some(QN_P_FRAGMENT), S5, Depth(0)),
]
.into_iter();
@@ -565,9 +559,9 @@ fn sym_fragment_event() {
#[test]
fn sym_fragment_missing_id() {
let toks = [
- open(QN_FRAGMENT, S1, Depth(0)),
+ open(QN_P_FRAGMENT, S1, Depth(0)),
// missing @id
- Xirf::Text("text".into(), S4),
+ Xirf::Text(Text("text".into(), S4), Depth(1)),
]
.into_iter();
@@ -582,10 +576,10 @@ fn sym_fragment_missing_id() {
#[test]
fn sym_fragment_empty_id() {
let toks = [
- open(QN_FRAGMENT, S1, Depth(0)),
+ open(QN_P_FRAGMENT, S1, Depth(0)),
// empty @id
Xirf::Attr(Attr(QN_ID, "".into(), AttrSpan(S3, S4))),
- Xirf::Text("text".into(), S4),
+ Xirf::Text(Text("text".into(), S4), Depth(1)),
]
.into_iter();
@@ -603,10 +597,10 @@ fn _sym_fragment_missing_text() {
let id = "fragsym".into();
let toks = [
- open(QN_FRAGMENT, S1, Depth(0)),
+ open(QN_P_FRAGMENT, S1, Depth(0)),
Xirf::Attr(Attr(QN_ID, id, AttrSpan(S3, S4))),
// missing text
- close(Some(QN_FRAGMENT), S5, Depth(0)),
+ close(Some(QN_P_FRAGMENT), S5, Depth(0)),
]
.into_iter();
@@ -633,35 +627,35 @@ fn xmlo_composite_parsers_header() {
let toks_header = [
open(QN_PACKAGE, S1, Depth(0)),
// <preproc:symtable>
- open(QN_SYMTABLE, S2, Depth(1)),
+ open(QN_P_SYMTABLE, S2, Depth(1)),
// <preproc:sym
- open(QN_SYM, S3, Depth(2)),
+ open(QN_P_SYM, S3, Depth(2)),
Xirf::Attr(Attr(QN_NAME, sym_name, AttrSpan(S2, S3))),
close_empty(S4, Depth(2)),
// />
- close(Some(QN_SYMTABLE), S4, Depth(1)),
+ close(Some(QN_P_SYMTABLE), S4, Depth(1)),
// </preproc:symtable>
// <preproc:sym-deps>
- open(QN_SYM_DEPS, S2, Depth(1)),
+ open(QN_P_SYM_DEPS, S2, Depth(1)),
// <preproc:sym-dep
- open(QN_SYM_DEP, S3, Depth(3)),
+ open(QN_P_SYM_DEP, S3, Depth(3)),
Xirf::Attr(Attr(QN_NAME, symdep_name, AttrSpan(S2, S3))),
- close(Some(QN_SYM_DEP), S4, Depth(3)),
+ close(Some(QN_P_SYM_DEP), S4, Depth(3)),
// </preproc:sym-dep>
- close(Some(QN_SYM_DEPS), S3, Depth(1)),
+ close(Some(QN_P_SYM_DEPS), S3, Depth(1)),
// </preproc:sym-deps>
// <preproc:fragments>
- open(QN_FRAGMENTS, S2, Depth(1)),
+ open(QN_P_FRAGMENTS, S2, Depth(1)),
// <preproc:fragment
- open(QN_FRAGMENT, S4, Depth(2)),
+ open(QN_P_FRAGMENT, S4, Depth(2)),
Xirf::Attr(Attr(QN_ID, symfrag_id, AttrSpan(S2, S3))),
- Xirf::Text(frag, S5),
- close(Some(QN_FRAGMENT), S4, Depth(2)),
+ Xirf::Text(Text(frag, S5), Depth(3)),
+ close(Some(QN_P_FRAGMENT), S4, Depth(2)),
// </preproc:fragment>
- close(Some(QN_FRAGMENTS), S3, Depth(1)),
+ close(Some(QN_P_FRAGMENTS), S3, Depth(1)),
// </preproc:fragments>
// No closing root node:
- // ensure that we can just end at the header without parsing further.
+ // ensure that we can just end at the header without parsing further).
]
.into_iter();
diff --git a/tamer/src/parse.rs b/tamer/src/parse.rs
index 58f5c33..fe148cb 100644
--- a/tamer/src/parse.rs
+++ b/tamer/src/parse.rs
@@ -25,27 +25,46 @@ mod error;
mod lower;
mod parser;
mod state;
+mod trace;
-pub use error::ParseError;
+pub mod util;
+
+pub use error::{FinalizeError, ParseError};
pub use lower::{Lower, LowerIter, ParsedObject};
-pub use parser::{Parsed, ParsedResult, Parser};
+pub use parser::{FinalizedParser, Parsed, ParsedResult, Parser};
pub use state::{
context::{Context, Empty as EmptyContext, NoContext},
- ParseResult, ParseState, ParseStatus, Transition, TransitionResult,
- Transitionable,
+ ClosedParseState, ParseResult, ParseState, ParseStatus, Transition,
+ TransitionResult, Transitionable,
};
-use crate::span::{Span, DUMMY_SPAN};
+use crate::span::{Span, UNKNOWN_SPAN};
use std::{
error::Error,
fmt::{Debug, Display},
};
+/// Prelude for TAME's parsing framework.
+///
+/// This contains the boilerplate types necessary for virtually every
+/// parser.
+pub mod prelude {
+ pub use super::{
+ Context, Object, ParseState, Token, Transition, TransitionResult,
+ };
+}
+
/// A single datum from a streaming IR with an associated [`Span`].
///
/// A token may be a lexeme with associated data,
/// or a more structured object having been lowered from other IRs.
pub trait Token: Display + Debug + PartialEq {
+ /// Name of the intermediate representation (IR) this token represents.
+ ///
+ /// This is used for diagnostic information,
+ /// primarily for debugging TAMER itself.
+ fn ir_name() -> &'static str;
+
/// Retrieve the [`Span`] representing the source location of the token.
fn span(&self) -> Span;
}
@@ -66,7 +85,11 @@ pub struct UnknownToken;
impl Token for UnknownToken {
fn span(&self) -> Span {
- DUMMY_SPAN
+ UNKNOWN_SPAN
+ }
+
+ fn ir_name() -> &'static str {
+ "<UNKNOWN IR>"
}
}
@@ -110,7 +133,7 @@ pub mod test {
use super::*;
use crate::{
diagnose::{AnnotatedSpan, Diagnostic},
- span::{DUMMY_SPAN as DS, UNKNOWN_SPAN},
+ span::{dummy::DUMMY_SPAN as DS, UNKNOWN_SPAN},
sym::GlobalSymbolIntern,
};
use std::{assert_matches::assert_matches, iter::once};
@@ -124,12 +147,16 @@ pub mod test {
}
impl Display for TestToken {
- fn fmt(&self, _f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
- unimplemented!("fmt::Display")
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ write!(f, "(test token)")
}
}
impl Token for TestToken {
+ fn ir_name() -> &'static str {
+ "<PARSE TEST IR>"
+ }
+
fn span(&self) -> Span {
use TestToken::*;
match self {
@@ -183,7 +210,7 @@ pub mod test {
}
}
- fn is_accepting(&self) -> bool {
+ fn is_accepting(&self, _: &Self::Context) -> bool {
*self == Self::Done
}
}
@@ -200,8 +227,8 @@ pub mod test {
}
impl Display for EchoStateError {
- fn fmt(&self, _: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
- unimplemented!()
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ write!(f, "test EchoStateError")
}
}
@@ -213,7 +240,7 @@ pub mod test {
impl Diagnostic for EchoStateError {
fn describe(&self) -> Vec<AnnotatedSpan> {
- unimplemented!()
+ vec![]
}
}
@@ -256,11 +283,13 @@ pub mod test {
// state,
// we must fail when we encounter the end of the stream.
assert_eq!(
- Some(Err(ParseError::UnexpectedEof(
- span.endpoints().1.unwrap(),
- // All the states have the same string
- // (at time of writing).
- EchoState::default().to_string(),
+ Some(Err(ParseError::FinalizeError(
+ FinalizeError::UnexpectedEof(
+ span.endpoints().1.unwrap(),
+ // All the states have the same string
+ // (at time of writing).
+ EchoState::default().to_string(),
+ )
))),
sut.next()
);
@@ -316,7 +345,8 @@ pub mod test {
let result = sut.finalize();
assert_matches!(
result,
- Err((_, ParseError::UnexpectedEof(s, _))) if s == span.endpoints().1.unwrap()
+ Err((_, FinalizeError::UnexpectedEof(s, _)))
+ if s == span.endpoints().1.unwrap()
);
// The sut should have been re-returned,
@@ -361,7 +391,7 @@ pub mod test {
let mut toks = vec![TestToken::MarkDone(DS)].into_iter();
let mut sut = Sut::from(&mut toks);
sut.next().unwrap().unwrap();
- let ctx = sut.finalize().unwrap();
+ let ctx = sut.finalize().unwrap().into_context();
assert_eq!(ctx, Default::default());
// Next, verify that the context that is manipulated is the context
@@ -370,7 +400,7 @@ pub mod test {
let mut toks = vec![TestToken::SetCtxVal(5)].into_iter();
let mut sut = Sut::from(&mut toks);
sut.next().unwrap().unwrap();
- let ctx = sut.finalize().unwrap();
+ let ctx = sut.finalize().unwrap().into_context();
assert_eq!(ctx, StubContext { val });
// Finally, verify that the context provided is the context that is
@@ -380,7 +410,241 @@ pub mod test {
let mut toks = vec![TestToken::MarkDone(DS)].into_iter();
let mut sut = EchoState::parse_with_context(&mut toks, given_ctx);
sut.next().unwrap().unwrap();
- let ctx = sut.finalize().unwrap();
+ let ctx = sut.finalize().unwrap().into_context();
assert_eq!(ctx, StubContext { val });
}
+
+ // This healthy block of mostly-boilerplate verifies that the practical
+ // use case of the trampoline system actually type-checks,
+ // and was used during development as a simpler example than having
+ // to content with the mammoth `ele_parse!`.
+ // There is no runtime test;
+ // it will fail to compile if there's a problem.
+ mod superst {
+ use crate::span::dummy::S1;
+
+ use super::*;
+
+ #[derive(Debug, PartialEq, Eq)]
+ enum Sup {
+ SubA(SubA),
+ SubB(SubB),
+ }
+
+ #[derive(Debug, PartialEq, Eq)]
+ enum SubA {
+ A,
+ }
+
+ #[derive(Debug, PartialEq, Eq)]
+ enum SubB {
+ B,
+ }
+
+ impl Display for Sup {
+ fn fmt(&self, _f: &mut std::fmt::Formatter) -> std::fmt::Result {
+ unimplemented!()
+ }
+ }
+
+ impl Display for SubA {
+ fn fmt(&self, _f: &mut std::fmt::Formatter) -> std::fmt::Result {
+ unimplemented!()
+ }
+ }
+
+ impl Display for SubB {
+ fn fmt(&self, _f: &mut std::fmt::Formatter) -> std::fmt::Result {
+ unimplemented!()
+ }
+ }
+
+ impl From<SubA> for Sup {
+ fn from(sub: SubA) -> Self {
+ Self::SubA(sub)
+ }
+ }
+
+ impl From<SubB> for Sup {
+ fn from(sub: SubB) -> Self {
+ Self::SubB(sub)
+ }
+ }
+
+ #[derive(Debug, PartialEq)]
+ enum SupError {
+ SubA(SubAError),
+ SubB(SubBError),
+ }
+ #[derive(Debug, PartialEq)]
+ enum SubAError {}
+ #[derive(Debug, PartialEq)]
+ enum SubBError {}
+
+ impl Error for SupError {
+ fn source(&self) -> Option<&(dyn Error + 'static)> {
+ None
+ }
+ }
+
+ impl Display for SupError {
+ fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+ write!(f, "SupError")
+ }
+ }
+
+ impl Diagnostic for SupError {
+ fn describe(&self) -> Vec<AnnotatedSpan> {
+ vec![]
+ }
+ }
+
+ impl Error for SubAError {
+ fn source(&self) -> Option<&(dyn Error + 'static)> {
+ None
+ }
+ }
+
+ impl Display for SubAError {
+ fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+ write!(f, "SubAError")
+ }
+ }
+
+ impl Diagnostic for SubAError {
+ fn describe(&self) -> Vec<AnnotatedSpan> {
+ vec![]
+ }
+ }
+
+ impl Error for SubBError {
+ fn source(&self) -> Option<&(dyn Error + 'static)> {
+ None
+ }
+ }
+
+ impl Display for SubBError {
+ fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+ write!(f, "SubBError")
+ }
+ }
+
+ impl Diagnostic for SubBError {
+ fn describe(&self) -> Vec<AnnotatedSpan> {
+ vec![]
+ }
+ }
+
+ impl From<SubAError> for SupError {
+ fn from(sub: SubAError) -> Self {
+ Self::SubA(sub)
+ }
+ }
+
+ impl From<SubBError> for SupError {
+ fn from(sub: SubBError) -> Self {
+ Self::SubB(sub)
+ }
+ }
+
+ #[allow(dead_code)] // Used only for type checking atm.
+ #[derive(Debug, PartialEq, Eq)]
+ enum SupToken {
+ ToA,
+ ToB,
+ }
+
+ impl Token for SupToken {
+ fn ir_name() -> &'static str {
+ "SupTest"
+ }
+
+ fn span(&self) -> Span {
+ S1
+ }
+ }
+
+ impl Display for SupToken {
+ fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+ write!(f, "SupToken")
+ }
+ }
+
+ #[derive(Debug, PartialEq, Eq)]
+ enum SupObject {
+ FromA(SupToken),
+ FromB(SupToken),
+ }
+
+ impl Object for SupObject {}
+
+ impl ParseState for Sup {
+ type Token = SupToken;
+ type Object = SupObject;
+ type Error = SupError;
+
+ fn parse_token(
+ self,
+ tok: Self::Token,
+ ctx: &mut Self::Context,
+ ) -> TransitionResult<Self> {
+ match self {
+ Self::SubA(st) => st.parse_token(tok, ctx),
+ Self::SubB(st) => st.parse_token(tok, ctx),
+ }
+ }
+
+ fn is_accepting(&self, _: &Self::Context) -> bool {
+ true
+ }
+ }
+
+ impl ParseState for SubA {
+ type Token = SupToken;
+ type Object = SupObject;
+ type Error = SubAError;
+ type Super = Sup;
+
+ fn parse_token(
+ self,
+ tok: Self::Token,
+ _ctx: &mut Self::Context,
+ ) -> TransitionResult<Self::Super> {
+ match tok {
+ SupToken::ToA => Transition(self).ok(SupObject::FromA(tok)),
+ SupToken::ToB => {
+ Transition(SubB::B).ok(SupObject::FromA(tok))
+ }
+ }
+ }
+
+ fn is_accepting(&self, _: &Self::Context) -> bool {
+ true
+ }
+ }
+
+ impl ParseState for SubB {
+ type Token = SupToken;
+ type Object = SupObject;
+ type Error = SubBError;
+ type Super = Sup;
+
+ fn parse_token(
+ self,
+ tok: Self::Token,
+ _ctx: &mut Self::Context,
+ ) -> TransitionResult<Self::Super> {
+ match tok {
+ SupToken::ToA => Transition(self).ok(SupObject::FromB(tok)),
+ SupToken::ToB => {
+ Transition(SubA::A).ok(SupObject::FromB(tok))
+ }
+ }
+ }
+
+ fn is_accepting(&self, _: &Self::Context) -> bool {
+ true
+ }
+ }
+ }
}
diff --git a/tamer/src/parse/error.rs b/tamer/src/parse/error.rs
index 5eb458f..a13be65 100644
--- a/tamer/src/parse/error.rs
+++ b/tamer/src/parse/error.rs
@@ -22,6 +22,7 @@
use super::Token;
use crate::{
diagnose::{Annotate, AnnotatedSpan, Diagnostic},
+ fmt::{DisplayWrapper, TtQuote},
span::Span,
};
use std::{error::Error, fmt::Display};
@@ -42,27 +43,6 @@ use super::{ParseState, ParseStatus, Parser};
/// [`StateError`][ParseError::StateError] variant.
#[derive(Debug, PartialEq)]
pub enum ParseError<T: Token, E: Diagnostic + PartialEq> {
- /// Token stream ended unexpectedly.
- ///
- /// This error means that the parser was expecting more input before
- /// reaching an accepting state.
- /// This could represent a truncated file,
- /// a malformed stream,
- /// or maybe just a user that's not done typing yet
- /// (e.g. in the case of an LSP implementation).
- ///
- /// If no span is available,
- /// then parsing has not even had the chance to begin.
- /// If this parser follows another,
- /// then the combinator ought to substitute a missing span with
- /// whatever span preceded this invocation.
- ///
- /// The string is intended to describe what was expected to have been
- /// available based on the current [`ParseState`].
- /// It is a heap-allocated string so that a copy of [`ParseState`]
- /// needn't be stored.
- UnexpectedEof(Span, String),
-
/// The parser reached an unhandled dead state.
///
/// For more information,
@@ -74,23 +54,15 @@ pub enum ParseError<T: Token, E: Diagnostic + PartialEq> {
/// needn't be stored.
UnexpectedToken(T, String),
- /// The parser contains an outstanding token of lookahead that is no
- /// longer
- /// (or possibly never was)
- /// part of the token stream,
- /// and would therefore be lost if the parser is finalized.
- ///
- /// The parser must consume the next token,
- /// which will be the token of lookahead,
- /// after which it may finalize provided that it is in an accepting
- /// state.
- ///
- /// See [`Parser::take_lookahead_tok`] for more information.
- Lookahead(Span, String),
-
/// A parser-specific error associated with an inner
/// [`ParseState`].
StateError(E),
+
+ /// The parser has no more input,
+ /// but it failed to automatically finalize.
+ ///
+ /// See [`Parser::finalize`] for more information.
+ FinalizeError(FinalizeError),
}
impl<T: Token, EA: Diagnostic + PartialEq> ParseError<T, EA> {
@@ -102,29 +74,109 @@ impl<T: Token, EA: Diagnostic + PartialEq> ParseError<T, EA> {
{
use ParseError::*;
match self {
- UnexpectedEof(span, desc) => UnexpectedEof(span, desc),
UnexpectedToken(x, desc) => UnexpectedToken(x, desc),
- Lookahead(span, desc) => Lookahead(span, desc),
StateError(e) => StateError(e.into()),
+ FinalizeError(e) => FinalizeError(e),
}
}
}
-impl<T: Token, E: Diagnostic + PartialEq> From<E> for ParseError<T, E> {
- fn from(e: E) -> Self {
- Self::StateError(e)
+//impl<T: Token, E: Diagnostic + PartialEq> From<E> for ParseError<T, E> {
+// fn from(e: E) -> Self {
+// Self::StateError(e)
+// }
+//}
+
+impl<T: Token, E: Diagnostic + PartialEq> From<FinalizeError>
+ for ParseError<T, E>
+{
+ fn from(e: FinalizeError) -> Self {
+ Self::FinalizeError(e)
}
}
impl<T: Token, E: Diagnostic + PartialEq> Display for ParseError<T, E> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
+ Self::UnexpectedToken(tok, desc) => {
+ write!(f, "unexpected {} while {desc}", TtQuote::wrap(tok))
+ }
+ Self::StateError(e) => Display::fmt(e, f),
+ Self::FinalizeError(e) => Display::fmt(e, f),
+ }
+ }
+}
+
+impl<T: Token, E: Diagnostic + PartialEq + 'static> Error for ParseError<T, E> {
+ fn source(&self) -> Option<&(dyn Error + 'static)> {
+ match self {
+ Self::UnexpectedToken(_, _) => None,
+ Self::StateError(e) => Some(e),
+ Self::FinalizeError(e) => Some(e),
+ }
+ }
+}
+
+impl<T: Token, E: Diagnostic + PartialEq + 'static> Diagnostic
+ for ParseError<T, E>
+{
+ fn describe(&self) -> Vec<AnnotatedSpan> {
+ use ParseError::*;
+
+ match self {
+ UnexpectedToken(tok, desc) => tok.span().error(desc).into(),
+ // TODO: Is there any additional useful context we can augment
+ // this with?
+ StateError(e) => e.describe(),
+ FinalizeError(e) => e.describe(),
+ }
+ }
+}
+
+#[derive(Debug, PartialEq)]
+pub enum FinalizeError {
+ /// Token stream ended unexpectedly.
+ ///
+ /// This error means that the parser was expecting more input before
+ /// reaching an accepting state.
+ /// This could represent a truncated file,
+ /// a malformed stream,
+ /// or maybe just a user that's not done typing yet
+ /// (e.g. in the case of an LSP implementation).
+ ///
+ /// If no span is available,
+ /// then parsing has not even had the chance to begin.
+ /// If this parser follows another,
+ /// then the combinator ought to substitute a missing span with
+ /// whatever span preceded this invocation.
+ ///
+ /// The string is intended to describe what was expected to have been
+ /// available based on the current [`ParseState`].
+ /// It is a heap-allocated string so that a copy of [`ParseState`]
+ /// needn't be stored.
+ UnexpectedEof(Span, String),
+
+ /// The parser contains an outstanding token of lookahead that is no
+ /// longer
+ /// (or possibly never was)
+ /// part of the token stream,
+ /// and would therefore be lost if the parser is finalized.
+ ///
+ /// The parser must consume the next token,
+ /// which will be the token of lookahead,
+ /// after which it may finalize provided that it is in an accepting
+ /// state.
+ ///
+ /// See [`Parser::take_lookahead_tok`] for more information.
+ Lookahead(Span, String),
+}
+
+impl Display for FinalizeError {
+ fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+ match self {
Self::UnexpectedEof(_, desc) => {
write!(f, "unexpected end of input while {desc}")
}
- Self::UnexpectedToken(_, desc) => {
- write!(f, "unexpected input while {desc}")
- }
// This is not really something the user should have to deal
// with,
// but maybe this will provide enough information that the
@@ -139,33 +191,19 @@ impl<T: Token, E: Diagnostic + PartialEq> Display for ParseError<T, E> {
outstanding token of lookahead while {desc}"
)
}
- Self::StateError(e) => Display::fmt(e, f),
}
}
}
-impl<T: Token, E: Diagnostic + PartialEq + 'static> Error for ParseError<T, E> {
- fn source(&self) -> Option<&(dyn Error + 'static)> {
- match self {
- Self::StateError(e) => Some(e),
- _ => None,
- }
- }
-}
+impl Error for FinalizeError {}
-impl<T: Token, E: Diagnostic + PartialEq + 'static> Diagnostic
- for ParseError<T, E>
-{
+impl Diagnostic for FinalizeError {
fn describe(&self) -> Vec<AnnotatedSpan> {
- use ParseError::*;
+ use FinalizeError::*;
match self {
UnexpectedEof(span, desc) => span.error(desc).into(),
- UnexpectedToken(tok, desc) => tok.span().error(desc).into(),
Lookahead(span, desc) => span.error(desc).into(),
- // TODO: Is there any additional useful context we can augment
- // this with?
- StateError(e) => e.describe(),
}
}
}
diff --git a/tamer/src/parse/lower.rs b/tamer/src/parse/lower.rs
index dd6efe2..15bd2f0 100644
--- a/tamer/src/parse/lower.rs
+++ b/tamer/src/parse/lower.rs
@@ -20,52 +20,50 @@
//! IR lowering operation between [`Parser`]s.
use super::{
- NoContext, Object, ParseError, ParseState, Parsed, ParsedResult, Parser,
- Token, TransitionResult, UnknownToken,
-};
-use crate::{
- diagnose::Diagnostic,
- iter::{TripIter, TrippableIterator},
+ state::ClosedParseState, FinalizeError, FinalizedParser, NoContext, Object,
+ ParseError, ParseState, Parsed, Parser, Token, TransitionResult,
+ UnknownToken,
};
+use crate::diagnose::Diagnostic;
use std::{fmt::Display, iter, marker::PhantomData};
#[cfg(doc)]
use super::TokenStream;
/// An IR lowering operation that pipes the output of one [`Parser`] to the
-/// input of another.
+/// input of another while propagating errors via a common
+/// [`WidenedError`] type `E`.
///
-/// This is produced by [`Lower`].
-pub struct LowerIter<'a, 'b, S, I, LS>
+/// This is produced by [`Lower`] methods.
+pub struct LowerIter<'a, S, I, LS, E>
where
S: ParseState,
- I: Iterator<Item = ParsedResult<S>>,
- LS: ParseState<Token = S::Object>,
+ I: Iterator<Item = WidenedParsedResult<S, E>>,
+ LS: ClosedParseState<Token = S::Object>,
<S as ParseState>::Object: Token,
+ E: WidenedError<S, LS>,
{
/// A push [`Parser`].
lower: Parser<LS, iter::Empty<LS::Token>>,
- /// Source tokens from higher-level [`Parser`],
- /// with the outer [`Result`] having been stripped by a [`TripIter`].
- toks: &'a mut TripIter<
- 'b,
- I,
- Parsed<S::Object>,
- ParseError<S::Token, S::Error>,
- >,
+ /// Source tokens from higher-level [`Parser`].
+ toks: &'a mut I,
+
+ /// `S` is used for its associated types only.
+ _phantom: PhantomData<S>,
}
-impl<'a, 'b, S, I, LS> LowerIter<'a, 'b, S, I, LS>
+impl<'a, S, I, LS, E> LowerIter<'a, S, I, LS, E>
where
S: ParseState,
- I: Iterator<Item = ParsedResult<S>>,
- LS: ParseState<Token = S::Object>,
+ I: Iterator<Item = WidenedParsedResult<S, E>>,
+ LS: ClosedParseState<Token = S::Object>,
<S as ParseState>::Object: Token,
+ E: WidenedError<S, LS>,
{
/// Consume inner parser and yield its context.
#[inline]
- fn finalize(self) -> Result<LS::Context, ParseError<LS::Token, LS::Error>> {
+ fn finalize(self) -> Result<FinalizedParser<LS>, FinalizeError> {
self.lower.finalize().map_err(|(_, e)| e)
}
}
@@ -74,11 +72,16 @@ where
///
/// Lowering is intended to be used between standalone [`ParseState`]s that
/// implement [`Default`].
-pub trait Lower<S, LS>
+///
+/// It is expected that input tokens have already been widened into `E`
+/// (a [`WidenedError`]) by a previous lowering operation,
+/// or by an introduction parser.
+pub trait Lower<S, LS, EW>
where
S: ParseState,
- LS: ParseState<Token = S::Object> + Default,
+ LS: ClosedParseState<Token = S::Object> + Default,
<S as ParseState>::Object: Token,
+ EW: WidenedError<S, LS>,
{
/// Lower the IR produced by this [`Parser`] into another IR by piping
/// the output to a new parser defined by the [`ParseState`] `LS`.
@@ -101,36 +104,34 @@ where
///
/// The new iterator is a [`LowerIter`],
/// and scoped to the provided closure `f`.
- /// The outer [`Result`] of `Self`'s [`ParsedResult`] is stripped by
- /// a [`TripIter`] before being provided as input to a new push
- /// [`Parser`] utilizing `LS`.
/// A push parser,
/// rather than pulling tokens from a [`TokenStream`],
/// has tokens pushed into it;
/// this parser is created automatically for you.
///
- /// _TODO_: There's no way to access the inner parser for error recovery
- /// after tripping the [`TripIter`].
- /// Consequently,
- /// this API (likely the return type) will change.
+ /// All errors from the parser `LS` are widened to the error type `E`,
+ /// which is expected to be an aggregate error type
+ /// (such as a sum type)
+ /// shared by the already-widened `S`-derived input.
+ /// Errors are propagated to the caller without lowering.
#[inline]
fn lower<U, E>(
&mut self,
- f: impl FnOnce(&mut LowerIter<S, Self, LS>) -> Result<U, E>,
+ f: impl FnOnce(&mut LowerIter<S, Self, LS, EW>) -> Result<U, E>,
) -> Result<U, E>
where
- Self: Iterator<Item = ParsedResult<S>> + Sized,
+ Self: Iterator<Item = WidenedParsedResult<S, EW>> + Sized,
<LS as ParseState>::Context: Default,
- ParseError<S::Token, S::Error>: Into<E>,
- ParseError<LS::Token, LS::Error>: Into<E>,
{
- self.while_ok(|toks| {
- // TODO: This parser is not accessible after error recovery!
- let lower = LS::parse(iter::empty());
- let mut iter = LowerIter { lower, toks };
- f(&mut iter)
- })
- .map_err(Into::into)
+ let lower = LS::parse(iter::empty());
+ let mut iter = LowerIter {
+ lower,
+ toks: self,
+ _phantom: PhantomData::default(),
+ };
+ f(&mut iter)
+
+ // TODO: Finalize!
}
/// Perform a lowering operation between two parsers where the context
@@ -140,63 +141,110 @@ where
///
/// See [`Lower::lower`] and [`ParseState::parse_with_context`] for more
/// information.
+ #[inline]
fn lower_with_context<U, E>(
&mut self,
ctx: LS::Context,
- f: impl FnOnce(&mut LowerIter<S, Self, LS>) -> Result<U, E>,
+ f: impl FnOnce(&mut LowerIter<S, Self, LS, EW>) -> Result<U, E>,
) -> Result<(U, LS::Context), E>
where
- Self: Iterator<Item = ParsedResult<S>> + Sized,
- ParseError<S::Token, S::Error>: Into<E>,
- ParseError<LS::Token, LS::Error>: Into<E>,
+ Self: Iterator<Item = WidenedParsedResult<S, EW>> + Sized,
+ E: Diagnostic + From<FinalizeError>,
{
- self.while_ok(|toks| {
- let lower = LS::parse_with_context(iter::empty(), ctx);
- let mut iter = LowerIter { lower, toks };
- let val = f(&mut iter)?;
-
- iter.finalize().map_err(Into::into).map(|ctx| (val, ctx))
- })
+ let lower = LS::parse_with_context(iter::empty(), ctx);
+ let mut iter = LowerIter {
+ lower,
+ toks: self,
+ _phantom: PhantomData::default(),
+ };
+ let val = f(&mut iter)?;
+
+ // TODO: Further propagate `FinalizedParser`
+ iter.finalize()
+ .map(FinalizedParser::into_context)
+ .map(|ctx| (val, ctx))
+ .map_err(E::from)
}
}
-impl<S, LS, I> Lower<S, LS> for I
+impl<S, LS, E, I> Lower<S, LS, E> for I
where
- I: Iterator<Item = ParsedResult<S>> + Sized,
+ I: Iterator<Item = WidenedParsedResult<S, E>> + Sized,
S: ParseState,
- LS: ParseState<Token = S::Object> + Default,
+ LS: ClosedParseState<Token = S::Object> + Default,
<S as ParseState>::Object: Token,
+ E: WidenedError<S, LS>,
{
}
-impl<'a, 'b, S, I, LS> Iterator for LowerIter<'a, 'b, S, I, LS>
+impl<'a, S, I, LS, E> Iterator for LowerIter<'a, S, I, LS, E>
where
S: ParseState,
- I: Iterator<Item = ParsedResult<S>>,
- LS: ParseState<Token = S::Object>,
+ I: Iterator<Item = WidenedParsedResult<S, E>>,
+ LS: ClosedParseState<Token = S::Object>,
<S as ParseState>::Object: Token,
+ E: WidenedError<S, LS>,
{
- type Item = ParsedResult<LS>;
+ type Item = WidenedParsedResult<LS, E>;
/// Pull a token through the higher-level [`Parser`],
/// push it to the lowering parser,
- /// and yield the resulting [`ParsedResult`].
+ /// and yield the lowered result.
+ ///
+ /// Errors from `LS` are widened into `E`.
#[inline]
fn next(&mut self) -> Option<Self::Item> {
let tok = self
.lower
.take_lookahead_tok()
.map(Parsed::Object)
+ .map(Ok)
.or_else(|| self.toks.next());
match tok {
+ // We are done when no tokens remain.
None => None,
- Some(Parsed::Incomplete) => Some(Ok(Parsed::Incomplete)),
- Some(Parsed::Object(obj)) => Some(self.lower.feed_tok(obj)),
+
+ // Errors have already been widened by the previous lowering
+ // operation.
+ Some(Err(e)) => Some(Err(e)),
+
+ // Incomplete parses are simply propagated,
+ // since we have no work to do.
+ Some(Ok(Parsed::Incomplete)) => Some(Ok(Parsed::Incomplete)),
+
+ // If a token was successfully parsed,
+ // then we can do our job and lower it.
+ // This utilizes the push parser `self.lower`.
+ Some(Ok(Parsed::Object(obj))) => {
+ Some(self.lower.feed_tok(obj).map_err(Into::into))
+ }
}
}
}
+/// A [`Diagnostic`] error type common to both `S` and `LS`.
+///
+/// This error type must be able to accommodate error variants from all
+/// associated lowering operations.
+/// The most obvious example of such an error type is an enum acting as a
+/// sum type,
+/// where the errors of each lowering operation are contained within
+/// separate variants.
+///
+/// This creates a common type that can be propagated through the lowering
+/// pipeline all the way to the calling terminal parser,
+/// which may then decide what to do
+/// (e.g. report errors and permit recovery,
+/// or terminate at the first sign of trouble).
+pub trait WidenedError<S: ParseState, LS: ParseState> = Diagnostic
+ + From<ParseError<<S as ParseState>::Token, <S as ParseState>::Error>>
+ + From<ParseError<<LS as ParseState>::Token, <LS as ParseState>::Error>>;
+
+/// A [`ParsedResult`](super::ParsedResult) with a [`WidenedError`].
+pub type WidenedParsedResult<S, E> =
+ Result<Parsed<<S as ParseState>::Object>, E>;
+
/// Representation of a [`ParseState`] producing some type of [`Object`].
///
/// This is intended to be used not as a value,
@@ -245,13 +293,15 @@ impl<O: Object, E: Diagnostic + PartialEq> ParseState for ParsedObject<O, E> {
unreachable!("ParsedObject must be used for type information only")
}
- fn is_accepting(&self) -> bool {
+ fn is_accepting(&self, _: &Self::Context) -> bool {
unreachable!("ParsedObject must be used for type information only")
}
}
// See `super::test` for more information on why there are so few tests
// here.
+// The robust types are quite effective at demanding coherency in spite of
+// complexity.
#[cfg(test)]
mod test {
use super::super::{
@@ -282,7 +332,7 @@ mod test {
Transition(self).ok(tok)
}
- fn is_accepting(&self) -> bool {
+ fn is_accepting(&self, _: &Self::Context) -> bool {
true
}
}
@@ -293,7 +343,7 @@ mod test {
let given = 27; // some value
let toks = vec![StubToken::YieldWithLookahead(given)];
- Lower::<StubEchoParseState, StubParseState>::lower(
+ Lower::<StubEchoParseState, StubParseState, _>::lower::<_, StubError>(
&mut StubEchoParseState::parse(toks.into_iter()),
|sut| {
// We have a single token,
@@ -321,9 +371,10 @@ mod test {
"expected end of both input stream and lookahead"
);
- Ok::<(), StubError>(())
+ Ok(Ok::<(), StubError>(()))
},
)
+ .unwrap()
.unwrap();
}
}
diff --git a/tamer/src/parse/parser.rs b/tamer/src/parse/parser.rs
index f093599..a4cacfe 100644
--- a/tamer/src/parse/parser.rs
+++ b/tamer/src/parse/parser.rs
@@ -20,8 +20,10 @@
//! High-level parsing abstraction.
use super::{
- ParseError, ParseResult, ParseState, ParseStatus, TokenStream, Transition,
- TransitionResult,
+ state::ClosedParseState,
+ trace::{self, ParserTrace},
+ FinalizeError, ParseError, ParseResult, ParseState, ParseStatus,
+ TokenStream, Transition, TransitionResult,
};
use crate::{
parse::state::{Lookahead, TransitionData},
@@ -76,7 +78,7 @@ impl<S: ParseState> From<ParseStatus<S>> for Parsed<S::Object> {
/// call [`finalize`](Parser::finalize) to ensure that parsing has
/// completed in an accepting state.
#[derive(Debug, PartialEq)]
-pub struct Parser<S: ParseState, I: TokenStream<S::Token>> {
+pub struct Parser<S: ClosedParseState, I: TokenStream<S::Token>> {
/// Input token stream to be parsed by the [`ParseState`] `S`.
toks: I,
@@ -127,9 +129,16 @@ pub struct Parser<S: ParseState, I: TokenStream<S::Token>> {
/// it was originally added for situations where Rust is unable to
/// elide the move of [`Parser::state`] in [`Parser::feed_tok`].
ctx: S::Context,
+
+ #[cfg(test)]
+ tracer: trace::HumanReadableTrace<"`cfg(test)`">,
+ #[cfg(all(not(test), feature = "parser-trace-stderr"))]
+ tracer: trace::HumanReadableTrace<"`cfg(parser-trace-stderr)`">,
+ #[cfg(not(any(test, feature = "parser-trace-stderr")))]
+ tracer: trace::VoidTrace,
}
-impl<S: ParseState, I: TokenStream<S::Token>> Parser<S, I> {
+impl<S: ClosedParseState, I: TokenStream<S::Token>> Parser<S, I> {
/// Create a parser with a pre-initialized [`ParseState`].
///
/// If the provided [`ParseState`] does not require context
@@ -146,6 +155,7 @@ impl<S: ParseState, I: TokenStream<S::Token>> Parser<S, I> {
state: Some(state),
last_span: UNKNOWN_SPAN,
ctx: Default::default(),
+ tracer: Default::default(),
}
}
@@ -158,7 +168,7 @@ impl<S: ParseState, I: TokenStream<S::Token>> Parser<S, I> {
/// since the parser will have no later opportunity to continue
/// parsing.
/// Consequently,
- /// the caller should expect [`ParseError::UnexpectedEof`] if the
+ /// the caller should expect [`FinalizeError::UnexpectedEof`] if the
/// parser is not in an accepting state.
///
/// To re-use the context returned by this method,
@@ -166,30 +176,28 @@ impl<S: ParseState, I: TokenStream<S::Token>> Parser<S, I> {
/// Note that whether the context is permitted to be reused,
/// or is useful independently to the caller,
/// is a decision made by the [`ParseState`].
- pub fn finalize(
- self,
- ) -> Result<S::Context, (Self, ParseError<S::Token, S::Error>)> {
+ pub fn finalize(self) -> Result<FinalizedParser<S>, (Self, FinalizeError)> {
match self.assert_accepting() {
- Ok(()) => Ok(self.ctx),
+ Ok(()) => Ok(FinalizedParser(self.ctx)),
Err(err) => Err((self, err)),
}
}
/// Return [`Ok`] if the parser both has no outstanding lookahead token
/// and is in an accepting state,
- /// otherwise [`Err`] with [`ParseError::UnexpectedEof`].
+ /// otherwise [`Err`] with [`FinalizeError::UnexpectedEof`].
///
/// See [`finalize`](Self::finalize) for the public-facing method.
- fn assert_accepting(&self) -> Result<(), ParseError<S::Token, S::Error>> {
+ fn assert_accepting(&self) -> Result<(), FinalizeError> {
let st = self.state.as_ref().unwrap();
if let Some(Lookahead(lookahead)) = &self.lookahead {
- Err(ParseError::Lookahead(lookahead.span(), st.to_string()))
- } else if st.is_accepting() {
+ Err(FinalizeError::Lookahead(lookahead.span(), st.to_string()))
+ } else if st.is_accepting(&self.ctx) {
Ok(())
} else {
let endpoints = self.last_span.endpoints();
- Err(ParseError::UnexpectedEof(
+ Err(FinalizeError::UnexpectedEof(
endpoints.1.unwrap_or(endpoints.0),
st.to_string(),
))
@@ -250,6 +258,12 @@ impl<S: ParseState, I: TokenStream<S::Token>> Parser<S, I> {
"lookahead token is available but was not consumed",
);
+ self.tracer.trace_tok_begin(
+ self.state.as_ref().unwrap(),
+ &tok,
+ &self.ctx,
+ );
+
// Parse a single token and perform the requested state transition.
//
// This is where the functional `ParseState` is married with a
@@ -265,6 +279,8 @@ impl<S: ParseState, I: TokenStream<S::Token>> Parser<S, I> {
// Note also that this is what Dead states require transitions.
let TransitionResult(Transition(state), data) =
self.state.take().unwrap().parse_token(tok, &mut self.ctx);
+
+ self.tracer.trace_tok_end(&state, &data, &self.ctx);
self.state.replace(state);
use ParseStatus::{Incomplete, Object};
@@ -299,7 +315,7 @@ impl<S: ParseState, I: TokenStream<S::Token>> Parser<S, I> {
match result {
Ok(parsed @ (Incomplete | Object(..))) => Ok(parsed.into()),
- Err(e) => Err(e.into()),
+ Err(e) => Err(ParseError::StateError(e)),
}
}
}
@@ -354,7 +370,7 @@ impl<S: ParseState, I: TokenStream<S::Token>> Parser<S, I> {
}
}
-impl<S: ParseState, I: TokenStream<S::Token>> Iterator for Parser<S, I> {
+impl<S: ClosedParseState, I: TokenStream<S::Token>> Iterator for Parser<S, I> {
type Item = ParsedResult<S>;
/// Parse a single [`Token`] according to the current
@@ -363,7 +379,7 @@ impl<S: ParseState, I: TokenStream<S::Token>> Iterator for Parser<S, I> {
///
/// If the underlying [`TokenStream`] yields [`None`],
/// then the [`ParseState`] must be in an accepting state;
- /// otherwise, [`ParseError::UnexpectedEof`] will occur.
+ /// otherwise, [`ParseError::FinalizeError`] will occur.
///
/// This is intended to be invoked by [`Iterator::next`].
/// Accepting a token rather than the [`TokenStream`] allows the caller
@@ -376,7 +392,7 @@ impl<S: ParseState, I: TokenStream<S::Token>> Iterator for Parser<S, I> {
match otok {
None => match self.assert_accepting() {
Ok(()) => None,
- Err(e) => Some(Err(e)),
+ Err(e) => Some(Err(e.into())),
},
Some(tok) => Some(self.feed_tok(tok)),
@@ -386,7 +402,7 @@ impl<S: ParseState, I: TokenStream<S::Token>> Iterator for Parser<S, I> {
impl<S, I> From<I> for Parser<S, I>
where
- S: ParseState + Default,
+ S: ClosedParseState + Default,
I: TokenStream<S::Token>,
<S as ParseState>::Context: Default,
{
@@ -408,13 +424,14 @@ where
state: Some(Default::default()),
last_span: UNKNOWN_SPAN,
ctx: Default::default(),
+ tracer: Default::default(),
}
}
}
impl<S, I, C> From<(I, C)> for Parser<S, I>
where
- S: ParseState<Context = C> + Default,
+ S: ClosedParseState<Context = C> + Default,
I: TokenStream<S::Token>,
{
/// Create a new parser with a provided context.
@@ -430,6 +447,27 @@ where
state: Some(Default::default()),
last_span: UNKNOWN_SPAN,
ctx,
+ tracer: Default::default(),
+ }
+ }
+}
+
+/// Residual state of a parser that has been finalized with
+/// [`Parser::finalize`].
+///
+/// This type can be used to ensure that parsers are always finalized at the
+/// end of an operation by providing such evidence to a caller.
+///
+/// If the inner [`ParseState::Context`] is empty or no longer needed,
+/// then this can be safely dropped without use.
+#[derive(Debug, PartialEq)]
+pub struct FinalizedParser<S: ParseState>(S::Context);
+
+impl<S: ParseState> FinalizedParser<S> {
+ /// Take ownership over the inner [`ParseState::Context`].
+ pub fn into_context(self) -> S::Context {
+ match self {
+ Self(ctx) => ctx,
}
}
}
@@ -440,7 +478,7 @@ pub mod test {
use crate::{
diagnose::Diagnostic,
parse::{Object, Token},
- span::DUMMY_SPAN,
+ span::dummy::DUMMY_SPAN,
};
use std::{assert_matches::assert_matches, error::Error, fmt::Display};
@@ -480,6 +518,10 @@ pub mod test {
}
impl Token for StubToken {
+ fn ir_name() -> &'static str {
+ "<PARSER TEST IR>"
+ }
+
fn span(&self) -> Span {
DUMMY_SPAN
}
@@ -488,8 +530,8 @@ pub mod test {
impl Object for StubToken {}
impl Display for StubToken {
- fn fmt(&self, _f: &mut std::fmt::Formatter) -> std::fmt::Result {
- unimplemented!()
+ fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+ write!(f, "(test token)")
}
}
@@ -559,7 +601,7 @@ pub mod test {
}
}
- fn is_accepting(&self) -> bool {
+ fn is_accepting(&self, _: &Self::Context) -> bool {
true
}
}
@@ -644,7 +686,7 @@ pub mod test {
.finalize()
.expect_err("must not finalize with token of lookahead");
- assert_matches!(err, ParseError::Lookahead(span, _) if span == DUMMY_SPAN);
+ assert_matches!(err, FinalizeError::Lookahead(span, _) if span == DUMMY_SPAN);
}
// Tests the above,
@@ -670,7 +712,7 @@ pub mod test {
.finalize()
.expect_err("must not finalize with token of lookahead");
- assert_matches!(err, ParseError::Lookahead(span, _) if span == DUMMY_SPAN);
+ assert_matches!(err, FinalizeError::Lookahead(span, _) if span == DUMMY_SPAN);
// The token of lookahead should still be available to the parser,
// and this should consume it.
diff --git a/tamer/src/parse/state.rs b/tamer/src/parse/state.rs
index 30b1760..9892d78 100644
--- a/tamer/src/parse/state.rs
+++ b/tamer/src/parse/state.rs
@@ -47,12 +47,30 @@ pub enum ParseStatus<S: ParseState> {
Object(S::Object),
}
+impl<S: ParseState> ParseStatus<S> {
+ pub fn into_super(self) -> ParseStatus<S::Super> {
+ match self {
+ Self::Incomplete => ParseStatus::Incomplete,
+ Self::Object(obj) => ParseStatus::Object(obj),
+ }
+ }
+}
+
impl<S: ParseState<Object = T>, T: Object> From<T> for ParseStatus<S> {
fn from(obj: T) -> Self {
Self::Object(obj)
}
}
+/// A [`ParseState`] that transitions only to itself
+/// (is closed under transition).
+///
+/// These are the only [`ParseState`]s that can be used directly by
+/// [`Parser`],
+/// since [`Parser`] must be able to both handle every provided
+/// [`Transition`] and know how to delegate to inner [`ParseState`]s.
+pub trait ClosedParseState = ParseState<Super = Self>;
+
/// A parsing automaton.
///
/// These states are utilized by a [`Parser`].
@@ -74,7 +92,11 @@ impl<S: ParseState<Object = T>, T: Object> From<T> for ParseStatus<S> {
/// but is not necessarily true for smaller, specialized parsers intended
/// for use as components of a larger parser
/// (in a spirit similar to parser combinators).
-pub trait ParseState: PartialEq + Eq + Display + Debug + Sized {
+pub trait ParseState: PartialEq + Eq + Display + Debug + Sized
+where
+ Self: Into<Self::Super>,
+ Self::Error: Into<<Self::Super as ParseState>::Error>,
+{
/// Input tokens to the parser.
type Token: Token;
@@ -82,7 +104,32 @@ pub trait ParseState: PartialEq + Eq + Display + Debug + Sized {
type Object: Object;
/// Errors specific to this set of states.
- type Error: Debug + Diagnostic + PartialEq;
+ type Error: Debug
+ + Diagnostic
+ + PartialEq
+ + Into<<Self::Super as ParseState>::Error>;
+
+ /// Superstate (parent state).
+ ///
+ /// This is applicable only if the [`ParseState`] is capable of
+ /// transitioning to a state outside of its own.
+ /// It was initially introduced for implementing trampolines in place of
+ /// composition-based delegation,
+ /// the latter of which would otherwise require boxing on
+ /// (extremely) hot code paths for otherwise-recursive data
+ /// structures.
+ ///
+ /// Intuitively,
+ /// the superstate represents a sum type of the pool of all possible
+ /// [`ParseState`]s that we can request transfer of control to.
+ /// This is the same concept as [`StitchableParseState`],
+ /// but operating in reverse
+ /// (delegation via trampoline instead of direct function call).
+ type Super: ClosedParseState<
+ Token = Self::Token,
+ Object = Self::Object,
+ Context = Self::Context,
+ > = Self;
/// Object provided to parser alongside each token.
///
@@ -97,7 +144,7 @@ pub trait ParseState: PartialEq + Eq + Display + Debug + Sized {
/// the context and the types that are able to be inferred.
fn parse<I: TokenStream<Self::Token>>(toks: I) -> Parser<Self, I>
where
- Self: Default,
+ Self: ClosedParseState + Default,
Self::Context: Default,
{
Parser::from(toks)
@@ -123,7 +170,7 @@ pub trait ParseState: PartialEq + Eq + Display + Debug + Sized {
ctx: Self::Context,
) -> Parser<Self, I>
where
- Self: Default,
+ Self: ClosedParseState + Default,
{
Parser::from((toks, ctx))
}
@@ -168,14 +215,14 @@ pub trait ParseState: PartialEq + Eq + Display + Debug + Sized {
self,
tok: Self::Token,
ctx: &mut Self::Context,
- ) -> TransitionResult<Self>;
+ ) -> TransitionResult<Self::Super>;
/// Whether the current state represents an accepting state.
///
/// An accepting state represents a valid state to stop parsing.
/// If parsing stops at a state that is _not_ accepting,
/// then the [`TokenStream`] has ended unexpectedly and should produce
- /// a [`ParseError::UnexpectedEof`].
+ /// a [`ParseError::FinalizeError`].
///
/// It makes sense for there to be exist multiple accepting states for a
/// parser.
@@ -185,7 +232,7 @@ pub trait ParseState: PartialEq + Eq + Display + Debug + Sized {
/// or the entire list of attributes.
/// It is acceptable to attempt to parse just one of those attributes,
/// or it is acceptable to parse all the way until the end.
- fn is_accepting(&self) -> bool;
+ fn is_accepting(&self, ctx: &Self::Context) -> bool;
/// Delegate parsing from a compatible, stitched [`ParseState`] `SP`.
///
@@ -217,9 +264,9 @@ pub trait ParseState: PartialEq + Eq + Display + Debug + Sized {
self,
tok: <Self as ParseState>::Token,
mut context: C,
- into: impl FnOnce(Self) -> Transition<SP>,
+ into: impl FnOnce(<Self as ParseState>::Super) -> Transition<SP>,
dead: impl FnOnce() -> Transition<SP>,
- ) -> TransitionResult<SP>
+ ) -> TransitionResult<<SP as ParseState>::Super>
where
Self: StitchableParseState<SP>,
C: AsMut<<Self as ParseState>::Context>,
@@ -242,12 +289,15 @@ pub trait ParseState: PartialEq + Eq + Display + Debug + Sized {
dead().incomplete().with_lookahead(lookahead)
}
TransitionData::Result(result, lookahead) => TransitionResult(
- into(newst),
+ into(newst).into_super(),
TransitionData::Result(
match result {
Ok(Incomplete) => Ok(Incomplete),
Ok(Obj(obj)) => Ok(Obj(obj.into())),
- Err(e) => Err(e.into()),
+ // First convert the error into `SP::Error`,
+ // and then `SP::Super::Error`
+ // (which will be the same type if SP is closed).
+ Err(e) => Err(e.into().into()),
},
lookahead,
),
@@ -255,6 +305,41 @@ pub trait ParseState: PartialEq + Eq + Display + Debug + Sized {
}
}
+ /// Delegate parsing of a token from our superstate
+ /// [`ParseState::Super`].
+ ///
+ /// This operates just as [`ParseState::delegate`];
+ /// the API is simplified because [`TransitionResult`] already has
+ /// data mapped to the superstate.
+ /// `dead` indicates when the child (`self`) has finished parsing.
+ fn delegate_child<C>(
+ self,
+ tok: Self::Token,
+ mut context: C,
+ dead: impl FnOnce(
+ Self::Super,
+ Self::Token,
+ C,
+ ) -> TransitionResult<Self::Super>,
+ ) -> TransitionResult<Self::Super>
+ where
+ C: AsMut<<Self as ParseState>::Context>,
+ {
+ let TransitionResult(Transition(newst), data) =
+ self.parse_token(tok, context.as_mut());
+
+ match data {
+ TransitionData::Dead(Lookahead(lookahead)) => {
+ dead(newst, lookahead, context)
+ }
+
+ // Since this is child state,
+ // [`TransitionResult`] has already converted into the
+ // superstate for us.
+ _ => TransitionResult(Transition(newst), data),
+ }
+ }
+
/// Delegate parsing from a compatible, stitched [`ParseState`] `SP`
/// until this parser yields an [`Object`].
///
@@ -270,13 +355,13 @@ pub trait ParseState: PartialEq + Eq + Display + Debug + Sized {
self,
tok: <Self as ParseState>::Token,
mut context: C,
- into: impl FnOnce(Self) -> Transition<SP>,
- _dead: impl FnOnce() -> Transition<SP>,
+ into: impl FnOnce(<Self as ParseState>::Super) -> Transition<SP>,
+ dead: impl FnOnce() -> Transition<SP>,
objf: impl FnOnce(
- Self,
+ <Self as ParseState>::Super,
<Self as ParseState>::Object,
- ) -> TransitionResult<SP>,
- ) -> TransitionResult<SP>
+ ) -> TransitionResult<<SP as ParseState>::Super>,
+ ) -> TransitionResult<<SP as ParseState>::Super>
where
Self: PartiallyStitchableParseState<SP>,
C: AsMut<<Self as ParseState>::Context>,
@@ -287,9 +372,8 @@ pub trait ParseState: PartialEq + Eq + Display + Debug + Sized {
self.parse_token(tok, context.as_mut());
match data {
- TransitionData::Dead(Lookahead(_lookahead)) => {
- // Or restrict this to certain types of ParseState
- todo!("expecting object, so what should we do on Dead?")
+ TransitionData::Dead(Lookahead(lookahead)) => {
+ dead().incomplete().with_lookahead(lookahead)
}
TransitionData::Result(Ok(Obj(obj)), lookahead) => {
@@ -298,11 +382,14 @@ pub trait ParseState: PartialEq + Eq + Display + Debug + Sized {
}
TransitionData::Result(result, lookahead) => TransitionResult(
- into(newst),
+ into(newst).into_super(),
TransitionData::Result(
match result {
Ok(_) => Ok(Incomplete),
- Err(e) => Err(e.into()),
+ // First convert the error into `SP::Error`,
+ // and then `SP::Super::Error`
+ // (which will be the same type if SP is closed).
+ Err(e) => Err(e.into().into()),
},
lookahead,
),
@@ -315,7 +402,9 @@ pub trait ParseState: PartialEq + Eq + Display + Debug + Sized {
///
/// See [`ParseState::delegate`] for more information.
/// This method exists for a XIRT and ought to be removed when it is no
- /// longer needed.
+ /// longer needed;
+ /// as such,
+ /// it works only with [`ClosedParseState`].
fn delegate_with_obj<SP, C, X>(
self,
tok: <Self as ParseState>::Token,
@@ -330,6 +419,7 @@ pub trait ParseState: PartialEq + Eq + Display + Debug + Sized {
) -> TransitionResult<SP>
where
Self: PartiallyStitchableParseState<SP>,
+ SP: ClosedParseState,
C: AsMut<<Self as ParseState>::Context>,
{
use ParseStatus::{Incomplete, Object as Obj};
@@ -383,11 +473,19 @@ pub type ParseStateResult<S> = Result<ParseStatus<S>, <S as ParseState>::Error>;
/// it is not necessary for parser composition,
/// provided that you perform the necessary wiring yourself in absence
/// of state stitching.
+///
+/// A [`ParseState`] can only be stitched if it is capable of standing on
+/// its own with a [`Parser`],
+/// meaning it must be a [`ClosedParseState`].
+/// Otherwise,
+/// the parser must return a transition to [`ParseState::Super`],
+/// and delegation from [`ParseState::Super`] itself can be performed with
+/// [`ParseState::delegate_child`].
pub trait StitchableParseState<SP: ParseState> =
PartiallyStitchableParseState<SP>
where <Self as ParseState>::Object: Into<<SP as ParseState>::Object>;
-pub trait PartiallyStitchableParseState<SP: ParseState> = ParseState
+pub trait PartiallyStitchableParseState<SP: ParseState> = ClosedParseState
where
SP: ParseState<Token = <Self as ParseState>::Token>,
<Self as ParseState>::Error: Into<<SP as ParseState>::Error>;
@@ -478,6 +576,12 @@ pub mod context {
}
}
+ impl<T: Debug + Default> AsMut<Context<T>> for Context<T> {
+ fn as_mut(&mut self) -> &mut Context<T> {
+ self
+ }
+ }
+
impl<T: Debug + Default> Deref for Context<T> {
type Target = T;
diff --git a/tamer/src/parse/state/transition.rs b/tamer/src/parse/state/transition.rs
index 0d8d9b3..02eef5f 100644
--- a/tamer/src/parse/state/transition.rs
+++ b/tamer/src/parse/state/transition.rs
@@ -19,7 +19,9 @@
//! State transitions for parser automata.
-use super::{ParseState, ParseStateResult, ParseStatus, Token};
+use super::{
+ ClosedParseState, ParseState, ParseStateResult, ParseStatus, Token,
+};
use std::{
convert::Infallible,
hint::unreachable_unchecked,
@@ -46,6 +48,12 @@ use super::Parser;
/// for example,
/// for multiple data to be emitted in response to a single token.
///
+/// If a [`ParseState`] is not a [`ClosedParseState`],
+/// the transition will be to its superstate ([`ParseState::Super`]);
+/// this conversion is performed automatically by the [`Transition`]
+/// methods that produce [`TransitionResult`],
+/// (such as [`Transition::ok`]).
+///
/// This struct is opaque to ensure that critical invariants involving
/// transitions and lookahead are properly upheld;
/// callers must use the appropriate parsing APIs.
@@ -58,6 +66,14 @@ pub struct TransitionResult<S: ParseState>(
);
impl<S: ParseState> TransitionResult<S> {
+ pub fn into_super(self) -> TransitionResult<S::Super> {
+ match self {
+ Self(t, data) => {
+ TransitionResult(t.into_super(), data.into_super())
+ }
+ }
+ }
+
/// Indicate that this transition include a single token of lookahead,
/// which should be provided back to the parser in place of the
/// next token from the input stream.
@@ -146,6 +162,50 @@ pub(in super::super) enum TransitionData<S: ParseState> {
Dead(Lookahead<S::Token>),
}
+impl<S: ParseState> TransitionData<S> {
+ pub fn into_super(self) -> TransitionData<S::Super> {
+ match self {
+ Self::Result(st_result, ola) => TransitionData::Result(
+ st_result.map(ParseStatus::into_super).map_err(|e| e.into()),
+ ola,
+ ),
+ Self::Dead(la) => TransitionData::Dead(la),
+ }
+ }
+
+ /// Reference to the token of lookahead,
+ /// if any.
+ pub(in super::super) fn lookahead_ref(
+ &self,
+ ) -> Option<&Lookahead<S::Token>> {
+ match self {
+ TransitionData::Dead(ref la)
+ | TransitionData::Result(_, Some(ref la)) => Some(la),
+ _ => None,
+ }
+ }
+
+ /// Reference to parsed object,
+ /// if any.
+ pub(in super::super) fn object_ref(&self) -> Option<&S::Object> {
+ match self {
+ TransitionData::Result(Ok(ParseStatus::Object(obj)), _) => {
+ Some(obj)
+ }
+ _ => None,
+ }
+ }
+
+ /// Reference to parsing error,
+ /// if any.
+ pub(in super::super) fn err_ref(&self) -> Option<&S::Error> {
+ match self {
+ TransitionData::Result(Err(e), _) => Some(e),
+ _ => None,
+ }
+ }
+}
+
/// A verb denoting a state transition.
///
/// This is typically instantiated directly by a [`ParseState`] to perform a
@@ -161,38 +221,69 @@ pub(in super::super) enum TransitionData<S: ParseState> {
pub struct Transition<S: ParseState>(pub S);
impl<S: ParseState> Transition<S> {
+ /// Transform a [`Transition`] into a transition of its superstate
+ /// [`ParseState::Super`].
+ ///
+ /// This is needed because trait specialization does not yet have a path
+ /// to stabilization as of the time of writing,
+ /// and so `From<Transition<S>> for Transition<S::Super>` cannot be
+ /// implemented because those types overlap.
+ pub fn into_super(self) -> Transition<S::Super> {
+ match self {
+ Transition(st) => Transition(st.into()),
+ }
+ }
+
/// A state transition with corresponding data.
///
/// This allows [`ParseState::parse_token`] to emit a parsed object and
/// corresponds to [`ParseStatus::Object`].
- pub fn ok<T>(self, obj: T) -> TransitionResult<S>
+ pub fn ok<T>(self, obj: T) -> TransitionResult<S::Super>
where
- T: Into<ParseStatus<S>>,
+ T: Into<ParseStatus<S::Super>>,
{
- TransitionResult(self, TransitionData::Result(Ok(obj.into()), None))
+ TransitionResult(
+ self.into_super(),
+ TransitionData::Result(Ok(obj.into()), None),
+ )
}
/// A transition with corresponding error.
///
/// This indicates a parsing failure.
/// The state ought to be suitable for error recovery.
- pub fn err<E: Into<S::Error>>(self, err: E) -> TransitionResult<S> {
- TransitionResult(self, TransitionData::Result(Err(err.into()), None))
+ pub fn err<E: Into<S::Error>>(self, err: E) -> TransitionResult<S::Super> {
+ // The first error conversion is into that expected by S,
+ // which will _then_ (below) be converted into S::Super
+ // (if they're not the same).
+ let err_s: S::Error = err.into();
+
+ TransitionResult(
+ self.into_super(),
+ TransitionData::Result(Err(err_s.into()), None),
+ )
}
/// A state transition with corresponding [`Result`].
///
/// This translates the provided [`Result`] in a manner equivalent to
/// [`Transition::ok`] and [`Transition::err`].
- pub fn result<T, E>(self, result: Result<T, E>) -> TransitionResult<S>
+ pub fn result<T, E>(
+ self,
+ result: Result<T, E>,
+ ) -> TransitionResult<S::Super>
where
T: Into<ParseStatus<S>>,
E: Into<S::Error>,
{
TransitionResult(
- self,
+ self.into_super(),
TransitionData::Result(
- result.map(Into::into).map_err(Into::into),
+ result
+ .map(Into::into)
+ .map(ParseStatus::into_super)
+ .map_err(Into::<S::Error>::into)
+ .map_err(Into::into),
None,
),
)
@@ -202,9 +293,9 @@ impl<S: ParseState> Transition<S> {
/// object can be emitted.
///
/// This corresponds to [`ParseStatus::Incomplete`].
- pub fn incomplete(self) -> TransitionResult<S> {
+ pub fn incomplete(self) -> TransitionResult<S::Super> {
TransitionResult(
- self,
+ self.into_super(),
TransitionData::Result(Ok(ParseStatus::Incomplete), None),
)
}
@@ -223,12 +314,15 @@ impl<S: ParseState> Transition<S> {
/// object first,
/// use [`Transition::result`] or other methods along with a token
/// of [`Lookahead`].
- pub fn dead(self, tok: S::Token) -> TransitionResult<S> {
- TransitionResult(self, TransitionData::Dead(Lookahead(tok)))
+ pub fn dead(self, tok: S::Token) -> TransitionResult<S::Super> {
+ TransitionResult(
+ self.into_super(),
+ TransitionData::Dead(Lookahead(tok)),
+ )
}
}
-impl<S: ParseState> FromResidual<(Transition<S>, ParseStateResult<S>)>
+impl<S: ClosedParseState> FromResidual<(Transition<S>, ParseStateResult<S>)>
for TransitionResult<S>
{
fn from_residual(residual: (Transition<S>, ParseStateResult<S>)) -> Self {
@@ -285,7 +379,7 @@ pub trait Transitionable<S: ParseState> {
///
/// This may be necessary to satisfy ownership/borrowing rules when
/// state data from `S` is used to compute [`Self`].
- fn transition(self, to: S) -> TransitionResult<S>;
+ fn transition(self, to: S) -> TransitionResult<S::Super>;
}
impl<S, E> Transitionable<S> for Result<ParseStatus<S>, E>
@@ -293,7 +387,7 @@ where
S: ParseState,
<S as ParseState>::Error: From<E>,
{
- fn transition(self, to: S) -> TransitionResult<S> {
+ fn transition(self, to: S) -> TransitionResult<S::Super> {
Transition(to).result(self)
}
}
@@ -303,13 +397,13 @@ where
S: ParseState,
<S as ParseState>::Error: From<E>,
{
- fn transition(self, to: S) -> TransitionResult<S> {
+ fn transition(self, to: S) -> TransitionResult<S::Super> {
Transition(to).result(self.map(|_| ParseStatus::Incomplete))
}
}
impl<S: ParseState> Transitionable<S> for ParseStatus<S> {
- fn transition(self, to: S) -> TransitionResult<S> {
- Transition(to).ok(self)
+ fn transition(self, to: S) -> TransitionResult<S::Super> {
+ Transition(to).ok(self.into_super())
}
}
diff --git a/tamer/src/parse/trace.rs b/tamer/src/parse/trace.rs
new file mode 100644
index 0000000..38ea764
--- /dev/null
+++ b/tamer/src/parse/trace.rs
@@ -0,0 +1,173 @@
+// Parser tracing
+//
+// Copyright (C) 2014-2022 Ryan Specialty Group, LLC.
+//
+// This file is part of TAME.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+//! Tracing for parsing operations.
+//!
+//! This provides human-readable traces on standard error any time a token
+//! is fed to the parser.
+//! These traces are provided automatically when `cfg(test)`,
+//! which means that they are automatically included in the output of any
+//! test failure.
+//!
+//! Outside of tests,
+//! this can be enabled at configuration-time using the
+//! `parser-trace-stderr` feature flag
+//! (`./configure FEATURES=parser-trace-stderr`).
+//!
+//! _These traces are not meant to be machine-readable!_
+//! There may be other useful tracing formats in the future,
+//! including OpenTelemetry and the DOT graph description language.
+//! Do not try to use the human-readable traces in that way since the format
+//! is subject to change without notice.
+
+use super::{state::TransitionData, ParseState, Token};
+
+pub(super) trait ParserTrace: Default {
+ /// Output the upper portion of a token trace.
+ ///
+ /// This begins the trace with information about the current
+ /// [`ParseState`] and the token that was received.
+ /// Post-transition tracing is handled by [`Self::trace_tok_end`].
+ ///
+ /// There is no means to return an error and a failure to output the
+ /// trace should not interrupt processing.
+ fn trace_tok_begin<S: ParseState>(
+ &mut self,
+ st_orig: &S,
+ tok: &S::Token,
+ ctx: &S::Context,
+ );
+
+ /// Output the lower portion of a token trace.
+ ///
+ /// This ends the trace with information about the transition and the
+ /// resulting [`ParseState`].
+ ///
+ /// There is no means to return an error and a failure to output the
+ /// trace should not interrupt processing.
+ fn trace_tok_end<S: ParseState>(
+ &mut self,
+ st_new: &S,
+ data: &TransitionData<S>,
+ ctx: &S::Context,
+ );
+}
+
+/// Perform no tracing.
+///
+/// This should be used by default for non-test builds,
+/// since tracing can incur a significant performance cost.
+#[derive(Debug, PartialEq, Default)]
+pub struct VoidTrace;
+
+impl ParserTrace for VoidTrace {
+ fn trace_tok_begin<S: ParseState>(
+ &mut self,
+ _st_orig: &S,
+ _tok: &S::Token,
+ _ctx: &S::Context,
+ ) {
+ // Do nothing at all.
+ }
+
+ fn trace_tok_end<S: ParseState>(
+ &mut self,
+ _st_new: &S,
+ _data: &TransitionData<S>,
+ _ctx: &S::Context,
+ ) {
+ // Do nothing at all.
+ }
+}
+
+/// Human-readable [`ParserTrace`].
+///
+///
+/// Note: if one of these trace blocks does not fully output,
+/// then you may have a `Display::fmt` or `Debug::fmt` panic---like
+/// a `todo!` or `unimplemented!`---in
+/// your `Token` or `ParseState`.
+///
+/// See [module-level](super) documentation for more information.
+#[derive(Debug, PartialEq, Default)]
+pub struct HumanReadableTrace<const REASON: &'static str>;
+
+impl<const REASON: &'static str> ParserTrace for HumanReadableTrace<REASON> {
+ fn trace_tok_begin<S: ParseState>(
+ &mut self,
+ st_orig: &S,
+ tok: &S::Token,
+ ctx: &S::Context,
+ ) {
+ eprint!(
+ "\
+[Parser::feed_tok] (input IR: {ir})
+| ==> Parser before tok is {st_orig}.
+| | {st_orig:?}
+| | Context: {ctx:?}
+|
+| ==> {ir} tok: {tok}
+| | {tok:?}
+|\n",
+ ir = S::Token::ir_name()
+ );
+ }
+
+ fn trace_tok_end<S: ParseState>(
+ &mut self,
+ st_new: &S,
+ data: &TransitionData<S>,
+ ctx: &S::Context,
+ ) {
+ eprint!(
+ "\
+| ==> Parser after tok is {st_new}.
+| | {st_new:?}
+| | Lookahead: {la:?}
+| | Context: {ctx:?}\n",
+ la = data.lookahead_ref(),
+ );
+
+ if let Some(obj) = data.object_ref() {
+ // Note that `Object` does not implement `Display`,
+ // but you'll see a `Display` representation if the object
+ // is passed to another `Parser` as a `Token`.
+ eprint!(
+ "\
+|
+| ==> Yielded object:
+| | {obj:?}\n",
+ );
+ }
+
+ if let Some(err) = data.err_ref() {
+ eprint!(
+ "\
+|
+| ==> !!! error: {err}.
+| | {err:?}\n",
+ );
+ }
+
+ eprint!(
+ "= note: this trace was output as a debugging aid \
+ because {REASON}.\n\n",
+ );
+ }
+}
diff --git a/tamer/src/parse/util.rs b/tamer/src/parse/util.rs
new file mode 100644
index 0000000..1a51348
--- /dev/null
+++ b/tamer/src/parse/util.rs
@@ -0,0 +1,92 @@
+// TAMER parsing framework utilities
+//
+// Copyright (C) 2014-2022 Ryan Specialty Group, LLC.
+//
+// This file is part of TAME.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+//! Utilities that make parsing practical and convenient in different
+//! contexts.
+//!
+//! The utilities presented here do not introduce any new capabilities into
+//! the system;
+//! they provide wrappers around core functionality that make it easier
+//! to use outside of the domain of the parsing system itself.
+
+use crate::{span::Span, sym::SymbolId};
+
+use super::{Object, ParseState, Token};
+use std::fmt::Display;
+
+pub trait ExpandingParseState<T: Token, O: Object> =
+ ParseState<Token = T, Object = Expansion<T, O>>;
+
+/// Represents an expansion operation on some source token of type `T`.
+///
+/// See variants and [`ExpandingParseState`] for more information.
+#[derive(Debug, PartialEq, Eq)]
+pub enum Expansion<T, O: Object> {
+ /// A token of type `O` has been derived from the source token and
+ /// should be merged into the target token stream.
+ Expanded(O),
+
+ /// Expansion is complete and the source token should be replaced with
+ /// the inner `T`.
+ DoneExpanding(T),
+}
+
+impl<T: Token, O: Object> Object for Expansion<T, O> {}
+
+/// A [`SymbolId`] with a corresponding [`Span`].
+///
+/// This newtype is required because foreign traits
+/// (such as [`Display`])
+/// cannot be implemented on tuples at the time of writing.
+#[derive(Debug, PartialEq, Eq)]
+pub struct SPair(pub SymbolId, pub Span);
+
+impl Token for SPair {
+ fn ir_name() -> &'static str {
+ "Generic Symbol"
+ }
+
+ fn span(&self) -> Span {
+ match self {
+ Self(_, span) => *span,
+ }
+ }
+}
+
+impl Display for SPair {
+ fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+ match self {
+ Self(sym, _) => Display::fmt(sym, f),
+ }
+ }
+}
+
+impl From<(SymbolId, Span)> for SPair {
+ fn from((sym, span): (SymbolId, Span)) -> Self {
+ Self(sym, span)
+ }
+}
+
+impl Into<(SymbolId, Span)> for SPair {
+ fn into(self) -> (SymbolId, Span) {
+ match self {
+ Self(sym, span) => (sym, span),
+ }
+ }
+}
diff --git a/tamer/src/span.rs b/tamer/src/span.rs
index b947ddf..54cba05 100644
--- a/tamer/src/span.rs
+++ b/tamer/src/span.rs
@@ -184,7 +184,7 @@
//! [rustc-span]: https://doc.rust-lang.org/stable/nightly-rustc/rustc_span/struct.Span.html
use crate::{
- global,
+ debug_diagnostic_panic, global,
sym::{st16, ContextStaticSymbolId, GlobalSymbolResolve, SymbolId},
};
use std::{convert::TryInto, fmt::Display, path::Path};
@@ -395,6 +395,47 @@ impl Span {
)
}
+ /// Create a new span that is a slice of this one.
+ ///
+ /// If either `rel_offset` or `len` are too large,
+ /// then a copy of the span will be returned unsliced.
+ ///
+ /// Panics (Debug Mode)
+ /// -------------------
+ /// If the offset and length exceeds the bounds of the span,
+ /// then the system has an arithmetic bug that ought to be corrected,
+ /// and so this will panic with a diagnostic message.
+ /// This check does not occur on release builds since this is not a
+ /// safety issue and should be caught by tests.
+ pub fn slice(self, rel_offset: usize, len: usize) -> Self {
+ let (irel_offset, ilen) = match (rel_offset.try_into(), len.try_into())
+ {
+ (Ok(x), Ok(y)) => (x, y),
+ _ => (0, self.len()),
+ };
+
+ // We shouldn't ignore slices that exceed the length of the span,
+ // since this represents a bug that'll cause nonsense diagnostic
+ // data and it represents an arithmetic bug in the system
+ // (but there are no safety concerns).
+ if ((irel_offset as usize).saturating_add(ilen as usize))
+ > self.len() as usize
+ {
+ use crate::diagnose::Annotate;
+ debug_diagnostic_panic!(
+ self.error("attempting to slice this span").into(),
+ "length {len} at offset {rel_offset} \
+ exceeds bounds of span {self}",
+ );
+ }
+
+ Self {
+ ctx: self.ctx,
+ offset: self.offset.saturating_add(irel_offset),
+ len: ilen,
+ }
+ }
+
/// Adjust span such that its offset is relative to the provided span.
///
/// If the provide `rel_span` does not precede this span,
@@ -490,16 +531,6 @@ impl Display for Span {
/// known.
pub const UNKNOWN_SPAN: Span = Span::st_ctx(st16::CTX_UNKNOWN);
-/// A dummy span that can be used in contexts where a span is expected but
-/// is not important.
-///
-/// This is intended primarily for tests;
-/// you should always use an appropriate span to permit sensible error
-/// messages and source analysis.
-///
-/// Additional dummy spans can be derived from this one.
-pub const DUMMY_SPAN: Span = Span::st_ctx(st16::CTX_DUMMY);
-
/// Context for byte offsets (e.g. a source file).
///
/// A context is lifetime-free and [`Copy`]-able,
@@ -558,16 +589,6 @@ impl Context {
/// yet known.
pub const UNKNOWN_CONTEXT: Context = Context(st16::raw::CTX_UNKNOWN);
-/// A dummy context that can be used where a span is expected but is not
-/// important.
-///
-/// This is intended primarily for tests;
-/// you should always use an appropriate span to permit sensible error
-/// messages and source analysis.
-///
-/// See also [`UNKNOWN_CONTEXT`].
-pub const DUMMY_CONTEXT: Context = Context(st16::raw::CTX_DUMMY);
-
impl<P: Into<PathSymbolId>> From<P> for Context {
fn from(sym: P) -> Self {
Self(sym.into())
@@ -610,6 +631,52 @@ impl<T: Copy + PartialOrd> From<(T, T)> for ClosedByteInterval<T> {
assert_eq_size!(ClosedByteInterval, u64);
+/// Dummy spans for testing.
+#[cfg(test)]
+pub mod dummy {
+ use super::{st16, Context, Span};
+
+ /// A dummy span that can be used in contexts where a span is expected
+ /// but is not important.
+ ///
+ /// This is intended primarily for tests;
+ /// you should always use an appropriate span to permit sensible error
+ /// messages and source analysis.
+ /// For spans that are actually unknown,
+ /// use [`super::UNKNOWN_SPAN`].
+ ///
+ /// Additional dummy spans can be derived from this one.
+ pub const DUMMY_SPAN: Span = Span::st_ctx(st16::CTX_DUMMY);
+
+ /// A dummy context that can be used where a span is expected but is not
+ /// important.
+ ///
+ /// This is intended primarily for tests;
+ /// you should always use an appropriate span to permit sensible error
+ /// messages and source analysis.
+ /// For contexts that are actually unknown,
+ /// use [`super::UNKNOWN_CONTEXT`].
+ ///
+ /// See also [`UNKNOWN_CONTEXT`].
+ pub const DUMMY_CONTEXT: Context = Context(st16::raw::CTX_DUMMY);
+
+ // This name is for brevity;
+ // we don't want to expose it because we don't want anyone to assume
+ // that a different name means that it's somehow different from
+ // `DUMMY_SPAN`.
+ const S0: Span = DUMMY_SPAN;
+
+ pub const S1: Span = S0.offset_add(1).unwrap();
+ pub const S2: Span = S0.offset_add(2).unwrap();
+ pub const S3: Span = S0.offset_add(3).unwrap();
+ pub const S4: Span = S0.offset_add(4).unwrap();
+ pub const S5: Span = S0.offset_add(5).unwrap();
+ pub const S6: Span = S0.offset_add(6).unwrap();
+ pub const S7: Span = S0.offset_add(7).unwrap();
+ pub const S8: Span = S0.offset_add(8).unwrap();
+ pub const S9: Span = S0.offset_add(9).unwrap();
+}
+
#[cfg(test)]
mod test {
use super::*;
@@ -755,4 +822,25 @@ mod test {
assert_eq!(start, Span::new(offset, 0, ctx));
assert_eq!(end, Span::new(SpanOffsetSize::MAX, 0, ctx));
}
+
+ #[test]
+ fn span_slice_yields_slice_within_original() {
+ let ctx = Context::from("slice");
+ let span = ctx.span(10, 10);
+
+ assert_eq!(ctx.span(15, 5), span.slice(5, 5));
+ }
+
+ #[test]
+ fn span_slice_large_values_yield_original() {
+ let span = Context::from("slice").span(0, 50);
+
+ // Too large of an offset should return original even though legnth
+ // is okay.
+ assert_eq!(span, span.slice(usize::MAX, 5));
+
+ // Too large of length should return original even though offset is
+ // okay.
+ assert_eq!(span, span.slice(0, usize::MAX));
+ }
}
diff --git a/tamer/src/sym/prefill.rs b/tamer/src/sym/prefill.rs
index 39d0fd1..7fe6835 100644
--- a/tamer/src/sym/prefill.rs
+++ b/tamer/src/sym/prefill.rs
@@ -195,9 +195,9 @@ macro_rules! static_symbol_consts {
#[doc=concat!(
"Interned `",
stringify!($ty),
- "` string `\"",
- $str,
- "\"`."
+ "` ",
+ static_symbol_consts!(@!str $ty $str),
+ "."
)]
#[doc=""]
#[doc=concat!(
@@ -227,7 +227,16 @@ macro_rules! static_symbol_consts {
/// This can be used to help determine a base capacity for
/// collections holding [`SymbolId`]s.
pub const ST_COUNT: usize = $i - 1;
- }
+ };
+
+ // Whitespace with newlines causes rustdoc parsing issues.
+ (@!str ws $str:expr) => {
+ "whitespace"
+ };
+
+ (@!str $ty:ident $str:expr) => {
+ concat!("string `\"", $str, "\"`")
+ };
}
/// Statically allocate [`SymbolId`]s for the provided symbols,
@@ -267,9 +276,9 @@ macro_rules! static_symbols {
#[doc=concat!(
"Raw (untyped) interned `",
stringify!($ty),
- "` string `\"",
- $str,
- "\"`."
+ "` ",
+ static_symbols!(@!str $ty $str),
+ "."
)]
#[doc=""]
#[doc=concat!(
@@ -309,7 +318,16 @@ macro_rules! static_symbols {
interner
}
- }
+ };
+
+ // Whitespace with newlines causes rustdoc parsing issues.
+ (@!str ws $str:expr) => {
+ "whitespace"
+ };
+
+ (@!str $ty:ident $str:expr) => {
+ concat!("string `\"", $str, "\"`")
+ };
}
static_symbol_newtypes! {
@@ -354,6 +372,14 @@ static_symbol_newtypes! {
/// Any other generic string that does not fit into any particular type.
str: GenericStaticSymbolId<global::ProgSymSize>,
+ /// Common strings of whitespace
+ /// (where a character of whitespace is `[ \n]`).
+ ///
+ /// There are certainly other whitespace characters,
+ /// but this is intended to be conservative to address only the most
+ /// common cases.
+ ws: WhitespaceStaticSymbolId<global::ProgSymSize>,
+
/// Static 16-bit [`Span`](crate::span::Span) context.
///
/// These contexts are intended for use in generated code where a better
@@ -420,6 +446,87 @@ pub mod st {
}
}
+ /// Whether the provided symbol is part of the static symbol list that
+ /// is pre-interned.
+ #[inline]
+ pub fn is_pre_interned(sym: SymbolId) -> bool {
+ let symid = sym.as_usize();
+ symid <= END_STATIC.as_usize()
+ }
+
+ /// Whether the given [`SymbolId`] is within a group of symbols
+ /// delimited by markers `a` and `b`.
+ ///
+ /// This provides a _reasonably_ efficient way to compare a [`SymbolId`]
+ /// against a large set of [`SymbolId`]s.
+ /// There are more efficient ways to accomplish this,
+ /// though,
+ /// if performance ever does become a concern;
+ /// the current implementation is kept simple until then.
+ #[inline]
+ pub fn is_between_markers(
+ a: MarkStaticSymbolId,
+ b: MarkStaticSymbolId,
+ sym: SymbolId,
+ ) -> bool {
+ let symid = sym.as_usize();
+ symid > a.as_usize() && symid < b.as_usize()
+ }
+
+ /// Whether the provided [`SymbolId`] is recognized as a common
+ /// whitespace symbol in the preinterned symbol list.
+ ///
+ /// If this returns `true`,
+ /// then this is a quick way to determine that the provided
+ /// [`SymbolId`] does contain only whitespace.
+ /// However,
+ /// this is _not_ comprehensive and never will be,
+ /// so an answer of `false` means "it may or may not be whitespace";
+ /// you should fall back to other methods of checking for
+ /// whitespace if this fails.
+ #[inline]
+ pub fn is_common_whitespace(sym: SymbolId) -> bool {
+ is_between_markers(WS_SYM_START, WS_SYM_END, sym)
+ }
+
+ /// Attempt to make a quick determination without a memory lookup
+ /// (symbol resolution) whether the given [`SymbolId`]'s string
+ /// representation definitely contains the given byte value.
+ ///
+ /// A value of [`None`] means "maybe, maybe not",
+ /// indicating that the caller ought to fall back to a slower check
+ /// that utilizes the symbol's resolved string.
+ /// A value of [`Some`] indicates that `sym`,
+ /// were it to be resolved,
+ /// definitely does or does not contain the byte `ch`.
+ ///
+ /// This is intended to encapsulate special,
+ /// loosely-defined cases where we can test that the interned symbols
+ /// actually properly adhere to the implementation of this function.
+ #[inline]
+ pub fn quick_contains_byte(sym: SymbolId, ch: u8) -> Option<bool> {
+ match (is_pre_interned(sym), ch) {
+ // No control characters or null bytes.
+ (true, 0..=0x1F) => Some(false),
+
+ // No characters outside the 7-bit ASCII range.
+ (true, 0x80..) => Some(false),
+
+ // Or the character range immediately preceding it,
+ // where 7F == DEL.
+ // They are explicitly listed here so that readers do not have
+ // to consult an ASCII table to avoid unintentional bugs.
+ (true, b'{' | b'|' | b'}' | b'~' | 0x7F) => Some(false),
+
+ // We don't check for anything else (yet).
+ (true, _) => None,
+
+ // We cannot possibly know statically whether dynamically
+ // interned symbols contain any particular byte.
+ (false, _) => None,
+ }
+ }
+
static_symbols! {
<crate::global::ProgSymSize>;
@@ -436,86 +543,248 @@ pub mod st {
N8: dec "8",
N9: dec "9",
+ L_ALL: cid "all",
+ L_ANY: cid "any",
+ L_APPLY: cid "apply",
+ L_APPLY_TEMPLATE: tid "apply-template",
+ L_ARG: cid "arg",
+ L_AS: cid "as",
+ L_BASE_TYPE: tid "base-type",
L_BOOLEAN: cid "boolean",
+ L_C: cid "c",
+ L_CAR: cid "car",
+ L_CASE: cid "case",
+ L_CASES: cid "cases",
+ L_CDR: cid "cdr",
+ L_CEIL: cid "ceil",
L_CGEN: cid "cgen",
L_CLASS: cid "class",
L_CLASSIFY: cid "classify",
+ L_CONS: cid "cons",
L_CONST: cid "const",
+ L_CORE: cid "core",
+ L_DASH: cid "dash",
+ L_DEFAULT: cid "default",
L_DEP: cid "dep",
L_DESC: cid "desc",
L_DIM: cid "dim",
+ L_DISPLAY: cid "display",
+ L_DOT: cid "dot",
L_DTYPE: cid "dtype",
+ L_DYN_NODE: tid "dyn-node",
L_ELIG_CLASS_YIELDS: tid "elig-class-yields",
L_EMPTY: cid "empty",
+ L_ENUM: cid "enum",
+ L_EQ: cid "eq",
+ L_ERROR: cid "error",
L_EXEC: cid "exec",
+ L_EXPAND_BARRIER: tid "expand-barrier",
+ L_EXPAND_FUNCTION: tid "expand-function",
+ L_EXPAND_GROUP: tid "expand-group",
+ L_EXPAND_SEQUENCE: tid "expand-sequence",
L_EXPORT: cid "export",
+ L_EXPT: cid "expt",
L_EXTERN: cid "extern",
L_FALSE: cid "false",
L_FLOAT: cid "float",
+ L_FLOOR: cid "floor",
+ L_FOR_EACH: tid "for-each",
L_FRAGMENT: cid "fragment",
L_FRAGMENTS: cid "fragments",
L_FROM: cid "from",
L_FUNC: cid "func",
+ L_FUNCTION: cid "function",
L_GEN: cid "gen",
L_GENERATED: cid "generated",
+ L_GENERATES: cid "generates",
+ L_GENSYM: cid "gensym",
+ L_GENTLE_NO: tid "gentle-no",
+ L_GT: cid "gt",
+ L_GTE: cid "gte",
L_ID: cid "id",
+ L_IDENTIFIER: cid "identifier",
+ L_IF: cid "if",
+ L_IGNORE_MISSING: tid "ignore-missing",
+ L_IMPORT: cid "import",
+ L_INDEX: cid "index",
+ L_INLINE_TEMPLATE: tid "inline-template",
L_INTEGER: cid "integer",
L_ISOVERRIDE: cid "isoverride",
+ L_ITEM: cid "item",
+ L_KEY: cid "key",
L_L: cid "l",
+ L_LABEL: cid "label",
+ L_LENGTH_OF: tid "length-of",
+ L_LET: cid "let",
+ L_LOCAL: cid "local",
+ L_LOWER: cid "lower",
L_LPARAM: cid "lparam",
+ L_LT: cid "lt",
+ L_LTE: cid "lte",
L_LV: cid "lv",
L_MAP: cid "map",
L_MAP_EXEC: tid "map-exec",
L_MAP_FROM: tid "map-from",
L_MAP_HEAD: qname "map:head",
L_MAP_TAIL: qname "map:tail",
+ L_MATCH: cid "match",
L_META: cid "meta",
+ L_METHOD: cid "method",
L_NAME: cid "name",
+ L_NAME_PREFIX: tid "name-prefix",
+ L_NE: cid "ne",
+ L_NO: cid "no",
+ L_NOVALIDATE: cid "novalidate",
+ L_OF: cid "of",
+ L_ON: cid "on",
+ L_OTHERWISE: cid "otherwise",
+ L_OVERRIDE: cid "override",
L_PACKAGE: cid "package",
L_PARAM: cid "param",
+ L_PARAM_ADD: tid "param-add",
+ L_PARAM_CLASS_TO_YIELDS: tid "param-class-to-yields",
+ L_PARAM_COPY: tid "param-copy",
+ L_PARAM_INHERIT: tid "param-inherit",
+ L_PARAM_META: tid "param-meta",
+ L_PARAM_SYM_VALUE: tid "param-sym-value",
+ L_PARAM_TYPEDEF_LOOKUP: tid "param-typedef-lookup",
+ L_PARAM_VALUE: tid "param-value",
L_PARENT: cid "parent",
+ L_PASS: cid "pass",
+ L_PATH: cid "path",
+ L_PREFIX: cid "prefix",
L_PREPROC: cid "preproc",
+ L_PRODUCT: cid "product",
L_PROGRAM: cid "program",
+ L_PROGRAM_MAP: tid "program-map",
+ L_QUOTIENT: cid "quotient",
L_RATE: cid "rate",
+ L_RATER: cid "rater",
+ L_RATE_EACH: cid "rate-each",
+ L_RECURSE: cid "recurse",
L_RETMAP: cid "retmap",
L_RETMAP_EXEC: tid "retmap-exec",
L_RETMAP_HEAD: qname "retmap:head",
L_RETMAP_TAIL: qname "retmap:tail",
+ L_RETURN_MAP: tid "return-map",
+ L_RMDASH: cid "rmdash",
+ L_RMUNDERSCORE: cid "rmunderscore",
+ L_SCALAR: cid "scalar",
+ L_SECTION: cid "section",
+ L_SET: cid "set",
+ L_SNAKE: cid "snake",
L_SRC: cid "src",
L_STATIC: cid "static",
+ L_SUFFIX: cid "suffix",
+ L_SUM: cid "sum",
L_SYM: cid "sym",
L_SYMTABLE: cid "symtable",
L_SYM_DEP: cid "sym-dep",
L_SYM_DEPS: cid "sym-deps",
L_SYM_REF: cid "sym-ref",
+ L_SYM_SET: tid "sym-set",
+ L_T: cid "t",
+ L_TEMPLATE: cid "template",
+ L_TERMINATE: cid "terminate",
+ L_TEXT: cid "text",
L_TITLE: cid "title",
+ L_TO: cid "to",
L_TPL: cid "tpl",
+ L_TRANSFORM: cid "transform",
+ L_TRANSLATE: cid "translate",
L_TRUE: cid "true",
L_TYPE: cid "type",
+ L_TYPEDEF: cid "typedef",
+ L_UCFIRST: cid "ucfirst",
+ L_UNION: cid "union",
+ L_UNIQUE: cid "unique",
+ L_UNLESS: cid "unless",
+ L_UPPER: cid "upper",
L_UUROOTPATH: cid "__rootpath",
L_VALUE: cid "value",
+ L_VALUES: cid "values",
+ L_VALUE_OF: cid "value-of",
+ L_VECTOR: cid "vector",
L_VIRTUAL: cid "virtual",
+ L_WARNING: cid "warning",
+ L_WHEN: cid "when",
L_WORKSHEET: cid "worksheet",
L_XMLNS: cid "xmlns",
+ L_YIELD: cid "yield",
L_YIELDS: cid "yields",
+ CC_ANY_OF: cid "anyOf",
+
L_MAP_UUUHEAD: str ":map:___head",
L_MAP_UUUTAIL: str ":map:___tail",
L_RETMAP_UUUHEAD: str ":retmap:___head",
L_RETMAP_UUUTAIL: str ":retmap:___tail",
- URI_LV_RATER: uri "http://www.lovullo.com/rater",
- URI_LV_PREPROC: uri "http://www.lovullo.com/rater/preproc",
+ URI_LV_CALC: uri "http://www.lovullo.com/calc",
URI_LV_LINKER: uri "http://www.lovullo.com/rater/linker",
+ URI_LV_PREPROC: uri "http://www.lovullo.com/rater/preproc",
+ URI_LV_PROGRAM_MAP: uri "http://www.lovullo.com/rater/map",
+ URI_LV_RATER: uri "http://www.lovullo.com/rater",
+ URI_LV_TPL: uri "http://www.lovullo.com/rater/apply-template",
+ URI_LV_WORKSHEET: uri "http://www.lovullo.com/rater/worksheet",
- // TODO: Whitespace type
- WS_EMPTY: str "",
+ // Common whitespace.
+ //
+ // _This does not represent all forms of whitespace!_
+ // Clearly,
+ // but it is worth emphasizing.
+ //
+ // The intent of these whitespace symbols is to provide a means to
+ // determine whether that symbol represents a common form of
+ // whitespace,
+ // before falling back to a more expensive symbol dereference
+ // and (likely-)linear scan.
+ //
+ // This list is preliminary and ought to be measured by evaluating a
+ // real-world codebase;
+ // it ought not to bloat the symbol table,
+ // but ought to get the most common cases so as not to fall
+ // back to a more expensive dereferencing of a symbol and
+ // subsequent scanning.
+ //
+ // There are improvements that can be made here,
+ // such as aligning the symbol ids such that whitespace can be
+ // asserted with a bitmask.
+ WS_SYM_START: mark "###WS_START",
+ WS_EMPTY: ws "",
+ WS_SP1: ws " ",
+ WS_SP2: ws " ",
+ WS_SP3: ws " ",
+ WS_SP4: ws " ",
+ WS_SP5: ws " ",
+ WS_SP6: ws " ",
+ WS_SP7: ws " ",
+ WS_SP8: ws " ",
+ WS_LF1: ws "\n",
+ WS_LF2: ws "\n\n",
+ WS_LF1_SP1: ws "\n ",
+ WS_LF1_SP2: ws "\n ",
+ WS_LF1_SP3: ws "\n ",
+ WS_LF1_SP4: ws "\n ",
+ WS_LF1_SP5: ws "\n ",
+ WS_LF1_SP6: ws "\n ",
+ WS_LF1_SP7: ws "\n ",
+ WS_LF1_SP8: ws "\n ",
+ WS_LF2_SP1: ws "\n\n ",
+ WS_LF2_SP2: ws "\n\n ",
+ WS_LF2_SP3: ws "\n\n ",
+ WS_LF2_SP4: ws "\n\n ",
+ WS_LF2_SP5: ws "\n\n ",
+ WS_LF2_SP6: ws "\n\n ",
+ WS_LF2_SP7: ws "\n\n ",
+ WS_LF2_SP8: ws "\n\n ",
+ WS_SYM_END: mark "###WS_END",
// [Symbols will be added here as they are needed.]
// Marker indicating the end of the static symbols
// (this must always be last).
- END_STATIC: mark "{{end}}"
+ END_STATIC: mark "###END"
}
}
@@ -542,7 +811,7 @@ pub mod st16 {
// Marker indicating the end of the static symbols
// (this must always be last).
- END_STATIC: mark16 "{{end}}"
+ END_STATIC: mark16 "###END"
}
}
@@ -640,4 +909,50 @@ mod test {
"st::ST_COUNT does not match the number of static symbols"
);
}
+
+ // [`quick_contains_bytes`] is asking for trouble if it's not properly
+ // maintained.
+ // It is expected that its implementation is manually verified,
+ // and it is written in a way that is clear and unambiguous.
+ // With that said,
+ // this does some minor spot-checking.
+ #[test]
+ fn quick_contains_byte_verify() {
+ use super::super::GlobalSymbolResolve;
+ use memchr::memchr;
+ use st::quick_contains_byte;
+
+ // No static symbols will contain control characters.
+ assert_eq!(quick_contains_byte(st::L_TRUE.into(), 0x01), Some(false));
+
+ // But we don't know about dynamically-allocated ones.
+ assert_eq!(
+ quick_contains_byte("NOT A PREINTERNED SYM".into(), 0x01),
+ None
+ );
+
+ // We chose to explicitly keep certain characters out of the
+ // preinterned list.
+ // Let's verify that is the case by iterating through _all of the
+ // static interns_.
+ for sym_id in 1..=st::ST_COUNT {
+ let sym = unsafe { SymbolId::from_int_unchecked(sym_id as u32) };
+
+ // If you get an error in this block,
+ // that means that you have added a symbol that violates
+ // assumptions made in `quick_contains_byte`.
+ // Either that implementation needs changing and this test
+ // updated,
+ // or you need to not add that symbol to the static symbol
+ // list.
+ for ch in b'{'..=0x7F {
+ assert_eq!(
+ memchr(ch, sym.lookup_str().as_bytes()),
+ None,
+ "Pre-interned static symbol {sym:?} \
+ contains unexpected byte 0x{ch:X}"
+ );
+ }
+ }
+ }
}
diff --git a/tamer/src/sym/symbol.rs b/tamer/src/sym/symbol.rs
index 8c1b928..5f50c1b 100644
--- a/tamer/src/sym/symbol.rs
+++ b/tamer/src/sym/symbol.rs
@@ -22,7 +22,7 @@
//! See the [parent module](super) for more information.
use super::{DefaultInterner, Interner};
-use crate::global;
+use crate::{diagnostic_panic, global};
use std::convert::{TryFrom, TryInto};
use std::fmt::{Debug, Display};
use std::hash::Hash;
@@ -263,7 +263,8 @@ impl<Ix: SymbolIndexSize> GlobalSymbolResolve for SymbolId<Ix> {
// If the system is being used properly, this should never
// happen (we'd only look up symbols allocated through this
// interner).
- panic!(
+ diagnostic_panic!(
+ vec![], // no span information available
"failed to resolve SymbolId({}) using global \
interner of length {}",
self.0.into(),
diff --git a/tamer/src/xir.rs b/tamer/src/xir.rs
index 852257d..57f3ef8 100644
--- a/tamer/src/xir.rs
+++ b/tamer/src/xir.rs
@@ -33,6 +33,7 @@
//! as opposed to "start" and "end" as used in the XML specification.
//! TAMER uses a uniform terminology for all delimited data.
+use crate::fmt::DisplayWrapper;
use crate::span::{Span, SpanLenSize};
use crate::sym::{
st_as_sym, GlobalSymbolIntern, GlobalSymbolInternBytes, SymbolId,
@@ -51,6 +52,8 @@ pub use escape::{DefaultEscaper, Escaper};
use error::SpanlessError;
use st::qname::QNameCompatibleStaticSymbolId;
+use self::fmt::{CloseXmlEle, OpenXmlEle, XmlAttr, XmlAttrValueQuote};
+
pub mod attr;
pub mod flat;
pub mod fmt;
@@ -157,6 +160,15 @@ impl TryFrom<&str> for NCName {
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct Prefix(NCName);
+impl Prefix {
+ /// Construct a constant [`Prefix`] from a static C-style symbol.
+ pub const fn st_cid<T: QNameCompatibleStaticSymbolId>(
+ prefix_sym: &T,
+ ) -> Self {
+ Self(NCName(st_as_sym(prefix_sym)))
+ }
+}
+
/// Local name portion of a [`QName`].
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct LocalPart(NCName);
@@ -217,49 +229,6 @@ impl Display for LocalPart {
}
}
-/// A sequence of one or more whitespace characters.
-///
-/// Whitespace here is expected to consist of `[ \n\t\r]`
-/// (where the first character in that class is a space).
-#[derive(Debug, Clone, Copy, PartialEq, Eq)]
-pub struct Whitespace(SymbolId);
-
-impl Deref for Whitespace {
- type Target = SymbolId;
-
- fn deref(&self) -> &Self::Target {
- &self.0
- }
-}
-
-impl TryFrom<&str> for Whitespace {
- type Error = SpanlessError;
-
- fn try_from(value: &str) -> Result<Self, Self::Error> {
- // We do not expect this to ever be a large value based on how we
- // use it.
- // If it is, well, someone's doing something they ought not to be
- // and we're not going to optimize for it.
- if !value.as_bytes().iter().all(u8::is_ascii_whitespace) {
- return Err(SpanlessError::NotWhitespace(value.into()));
- }
-
- Ok(Self(value.intern()))
- }
-}
-
-impl From<Whitespace> for SymbolId {
- fn from(ws: Whitespace) -> Self {
- ws.0
- }
-}
-
-impl Display for Whitespace {
- fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
- self.0.fmt(f)
- }
-}
-
/// A qualified name (namespace prefix and local name).
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct QName(Option<Prefix>, LocalPart);
@@ -431,8 +400,7 @@ impl CloseSpan {
}
}
-/// Number of bytes of whitespace following an element name in
-/// [`EleSpan`].
+/// Number of bytes representing the name of the element.
pub type EleNameLen = SpanLenSize;
/// Spans associated with an element opening or closing tag.
@@ -632,11 +600,6 @@ pub enum Token {
/// already present,
/// not for producing new CData safely!
CData(SymbolId, Span),
-
- /// Similar to `Text`,
- /// but intended for use where only whitespace is allowed,
- /// such as alignment of attributes.
- Whitespace(Whitespace, Span),
}
impl Display for Token {
@@ -646,31 +609,34 @@ impl Display for Token {
// but the diagnostic system also quote source lines to provide
// the necessary context.
match self {
- Self::Open(qname, _) => write!(f, "`<{}>`", qname),
- Self::Close(Some(qname), _) => write!(f, "`</{}>`", qname),
+ Self::Open(qname, _) => OpenXmlEle::fmt(qname, f),
+ Self::Close(Some(qname), _) => CloseXmlEle::fmt(qname, f),
// Its context is contained within the Open,
// and hopefully any user-visible errors will display that instead.
Self::Close(None, _) => {
- write!(f, "`/>`")
- }
- Self::AttrName(qname, _) => {
- write!(f, "`@{}`", qname)
- }
- Self::AttrValue(attr_val, _) => {
- write!(f, "attribute value `{}`", attr_val)
+ write!(f, "/>")
}
+ Self::AttrName(qname, _) => XmlAttr::fmt(qname, f),
+ Self::AttrValue(attr_val, _) => XmlAttrValueQuote::fmt(attr_val, f),
Self::AttrValueFragment(attr_val, _) => {
- write!(f, "attribute value fragment `{}`", attr_val)
+ write!(
+ f,
+ "value fragment {}",
+ XmlAttrValueQuote::wrap(attr_val)
+ )
}
Self::Comment(..) => write!(f, "comment"),
Self::Text(..) => write!(f, "text"),
Self::CData(..) => write!(f, "CDATA"),
- Self::Whitespace(..) => write!(f, "whitespace"),
}
}
}
impl crate::parse::Token for Token {
+ fn ir_name() -> &'static str {
+ "XIR"
+ }
+
/// Retrieve the [`Span`] associated with a given [`Token`].
///
/// Every token has an associated span.
@@ -685,8 +651,7 @@ impl crate::parse::Token for Token {
| AttrValueFragment(_, span)
| Comment(_, span)
| Text(_, span)
- | CData(_, span)
- | Whitespace(_, span) => *span,
+ | CData(_, span) => *span,
}
}
}
@@ -828,29 +793,9 @@ pub mod test {
}
}
- #[test]
- fn whitespace() -> TestResult {
- assert_eq!(Whitespace::try_from(" ")?, " ".try_into()?);
- assert_eq!(Whitespace::try_from(" \t ")?, " \t ".try_into()?);
-
- assert_eq!(
- Whitespace::try_from("not ws!"),
- Err(SpanlessError::NotWhitespace("not ws!".into(),))
- );
-
- Ok(())
- }
-
- #[test]
- fn whitespace_as_text() -> TestResult {
- assert_eq!(" ".intern(), Whitespace::try_from(" ")?.into(),);
-
- Ok(())
- }
-
mod ele_span {
use super::*;
- use crate::span::DUMMY_CONTEXT as DC;
+ use crate::span::dummy::DUMMY_CONTEXT as DC;
#[test]
fn open_without_attrs() {
diff --git a/tamer/src/xir/attr.rs b/tamer/src/xir/attr.rs
index b7f4389..a080305 100644
--- a/tamer/src/xir/attr.rs
+++ b/tamer/src/xir/attr.rs
@@ -160,6 +160,12 @@ impl Attr {
}
impl Token for Attr {
+ fn ir_name() -> &'static str {
+ // This may be used by multiple things,
+ // but it's primarily used by XIRF.
+ "XIRF"
+ }
+
fn span(&self) -> Span {
match self {
Attr(.., attr_span) => attr_span.span(),
@@ -171,7 +177,18 @@ impl crate::parse::Object for Attr {}
impl Display for Attr {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
- write!(f, "`@{}=\"{}\"` at {}", self.0, self.1, self.2 .0)
+ // Do not display value since it can contain any information and
+ // mess up formatted output.
+ // If we wish to display that information in the future,
+ // then we ought to escape and elide it,
+ // but we must furthermore make sure that it makes sense in all
+ // contexts;
+ // many diagnostic messages today expect that outputting an
+ // attribute will output the name of that attribute and
+ // nothing more.
+ match self {
+ Self(key, _value, _) => write!(f, "@{key}"),
+ }
}
}
@@ -245,7 +262,7 @@ impl<const N: usize> From<[Attr; N]> for AttrList {
#[cfg(test)]
mod test {
- use crate::span::DUMMY_CONTEXT as DC;
+ use crate::span::dummy::DUMMY_CONTEXT as DC;
use super::*;
diff --git a/tamer/src/xir/attr/parse.rs b/tamer/src/xir/attr/parse.rs
index 85275d1..477e267 100644
--- a/tamer/src/xir/attr/parse.rs
+++ b/tamer/src/xir/attr/parse.rs
@@ -21,9 +21,10 @@
use crate::{
diagnose::{Annotate, AnnotatedSpan, Diagnostic},
+ fmt::{DisplayWrapper, Tt},
parse::{NoContext, ParseState, Token, Transition, TransitionResult},
span::Span,
- xir::{QName, Token as XirToken},
+ xir::{fmt::XmlAttr, QName, Token as XirToken},
};
use std::{error::Error, fmt::Display};
@@ -76,7 +77,7 @@ impl ParseState for AttrParseState {
}
#[inline]
- fn is_accepting(&self) -> bool {
+ fn is_accepting(&self, _: &Self::Context) -> bool {
*self == Self::Empty
}
}
@@ -92,9 +93,13 @@ impl Display for AttrParseState {
use AttrParseState::*;
match self {
- Empty => write!(f, "expecting an attribute"),
+ Empty => write!(f, "expecting an attribute name"),
Name(name, _) => {
- write!(f, "expecting an attribute value for {name}")
+ write!(
+ f,
+ "expecting an attribute value for {}",
+ Tt::<XmlAttr>::wrap(name),
+ )
}
}
}
@@ -150,16 +155,14 @@ mod test {
use crate::{
convert::ExpectInto,
parse::{ParseError, Parsed},
+ span::dummy::*,
sym::GlobalSymbolIntern,
xir::test::{close_empty, open},
};
- const S: Span = crate::span::DUMMY_SPAN;
- const S2: Span = S.offset_add(1).unwrap();
-
#[test]
fn dead_if_first_token_is_non_attr() {
- let tok = open("foo", S);
+ let tok = open("foo", S1);
let mut sut = AttrParseState::parse(vec![tok.clone()].into_iter());
@@ -176,7 +179,7 @@ mod test {
let attr = "attr".unwrap_into();
let val = "val".intern();
- let toks = [XirToken::AttrName(attr, S), XirToken::AttrValue(val, S2)]
+ let toks = [XirToken::AttrName(attr, S1), XirToken::AttrValue(val, S2)]
.into_iter();
let sut = AttrParseState::parse(toks);
@@ -184,7 +187,7 @@ mod test {
assert_eq!(
Ok(vec![
Parsed::Incomplete,
- Parsed::Object(Attr::new(attr, val, (S, S2))),
+ Parsed::Object(Attr::new(attr, val, (S1, S2))),
]),
sut.collect()
);
@@ -196,7 +199,7 @@ mod test {
let recover = "value".intern();
let toks = vec![
- XirToken::AttrName(attr, S),
+ XirToken::AttrName(attr, S1),
close_empty(S2),
XirToken::AttrValue(recover, S2),
];
@@ -211,7 +214,7 @@ mod test {
assert_eq!(
sut.next(),
Some(Err(ParseError::StateError(
- AttrParseError::AttrValueExpected(attr, S, close_empty(S2))
+ AttrParseError::AttrValueExpected(attr, S1, close_empty(S2))
)))
);
@@ -227,7 +230,7 @@ mod test {
// let's actually attempt a recovery.
assert_eq!(
sut.next(),
- Some(Ok(Parsed::Object(Attr::new(attr, recover, (S, S2))))),
+ Some(Ok(Parsed::Object(Attr::new(attr, recover, (S1, S2))))),
);
// Finally, we should now be in an accepting state.
diff --git a/tamer/src/xir/flat.rs b/tamer/src/xir/flat.rs
index e2773bc..18be9d0 100644
--- a/tamer/src/xir/flat.rs
+++ b/tamer/src/xir/flat.rs
@@ -27,12 +27,14 @@
//!
//! 1. All closing tags must correspond to a matching opening tag at the
//! same depth;
-//! 2. [`XirfToken`] exposes the [`Depth`] of each opening/closing tag;
+//! 2. [`XirfToken`] exposes the [`Depth`] of each node-related token;
//! 3. Attribute tokens are parsed into [`Attr`] objects;
//! 4. Documents must begin with an element and end with the closing of
//! that element;
//! 5. Parsing will fail if input ends before all elements have been
//! closed.
+//! 6. Text nodes may optionally be parsed into [`RefinedText`] to
+//! distinguish whitespace.
//!
//! XIRF lowering does not perform any dynamic memory allocation;
//! maximum element nesting depth is set statically depending on the needs
@@ -40,26 +42,43 @@
use super::{
attr::{Attr, AttrParseError, AttrParseState},
- CloseSpan, OpenSpan, QName, Token as XirToken, TokenStream, Whitespace,
+ reader::is_xml_whitespace_char,
+ CloseSpan, OpenSpan, QName, Token as XirToken, TokenStream,
};
use crate::{
diagnose::{Annotate, AnnotatedSpan, Diagnostic},
parse::{
- Context, Object, ParseState, ParsedResult, Token, Transition,
- TransitionResult,
+ ClosedParseState, Context, Object, ParseState, ParsedResult, Token,
+ Transition, TransitionResult,
},
span::Span,
- sym::SymbolId,
+ sym::{st::is_common_whitespace, GlobalSymbolResolve, SymbolId},
xir::EleSpan,
};
use arrayvec::ArrayVec;
-use std::{error::Error, fmt::Display};
+use std::{
+ error::Error,
+ fmt::{Debug, Display},
+ marker::PhantomData,
+};
/// Tag nesting depth
/// (`0` represents the root).
-#[derive(Debug, Clone, PartialEq, Eq)]
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct Depth(pub usize);
+impl Depth {
+ /// Yield a new [`Depth`] representing the expected depth of children of
+ /// an element at the current depth.
+ ///
+ /// That description is probably more confusing than the method name.
+ pub fn child_depth(&self) -> Depth {
+ match self {
+ Depth(depth) => Depth(depth + 1),
+ }
+ }
+}
+
impl Display for Depth {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
Display::fmt(&self.0, f)
@@ -73,8 +92,13 @@ impl Display for Depth {
/// Other objects retain the same format as their underlying token,
/// but are still validated to ensure that they are well-formed and that
/// the XML is well-structured.
+///
+/// Each token representing a child node contains a numeric [`Depth`]
+/// indicating the nesting depth;
+/// this can be used by downstream parsers to avoid maintaining their
+/// own stack in certain cases.
#[derive(Debug, Clone, PartialEq, Eq)]
-pub enum XirfToken {
+pub enum XirfToken<T: TextType> {
/// Opening tag of an element.
Open(QName, OpenSpan, Depth),
@@ -94,12 +118,12 @@ pub enum XirfToken {
Attr(Attr),
/// Comment node.
- Comment(SymbolId, Span),
+ Comment(SymbolId, Span, Depth),
/// Character data as part of an element.
///
/// See also [`CData`](XirfToken::CData) variant.
- Text(SymbolId, Span),
+ Text(T, Depth),
/// CData node (`<![CDATA[...]]>`).
///
@@ -108,34 +132,32 @@ pub enum XirfToken {
/// This is intended for reading existing XML data where CData is
/// already present,
/// not for producing new CData safely!
- CData(SymbolId, Span),
-
- /// Similar to `Text`,
- /// but intended for use where only whitespace is allowed,
- /// such as alignment of attributes.
- Whitespace(Whitespace, Span),
+ CData(SymbolId, Span, Depth),
}
-impl Token for XirfToken {
+impl<T: TextType> Token for XirfToken<T> {
+ fn ir_name() -> &'static str {
+ "XIRF"
+ }
+
fn span(&self) -> Span {
use XirfToken::*;
match self {
Open(_, OpenSpan(span, _), _)
| Close(_, CloseSpan(span, _), _)
- | Comment(_, span)
- | Text(_, span)
- | CData(_, span)
- | Whitespace(_, span) => *span,
+ | Comment(_, span, _)
+ | CData(_, span, _) => *span,
+ Text(text, _) => text.span(),
Attr(attr) => attr.span(),
}
}
}
-impl Object for XirfToken {}
+impl<T: TextType> Object for XirfToken<T> {}
-impl Display for XirfToken {
+impl<T: TextType> Display for XirfToken<T> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
use XirfToken::*;
@@ -147,27 +169,123 @@ impl Display for XirfToken {
Display::fmt(&XirToken::Close(*oqname, *span), f)
}
Attr(attr) => Display::fmt(&attr, f),
- Comment(sym, span) => {
+ Comment(sym, span, _) => {
Display::fmt(&XirToken::Comment(*sym, *span), f)
}
- Text(sym, span) => Display::fmt(&XirToken::Text(*sym, *span), f),
- CData(sym, span) => Display::fmt(&XirToken::CData(*sym, *span), f),
- Whitespace(ws, span) => {
- Display::fmt(&XirToken::Whitespace(*ws, *span), f)
+ Text(text, _) => Display::fmt(text, f),
+ CData(sym, span, _) => {
+ Display::fmt(&XirToken::CData(*sym, *span), f)
}
}
}
}
-impl From<Attr> for XirfToken {
+impl<T: TextType> From<Attr> for XirfToken<T> {
fn from(attr: Attr) -> Self {
Self::Attr(attr)
}
}
+/// Token of an optionally refined [`Text`].
+///
+/// XIRF is configurable on the type of processing it performs on [`Text`],
+/// including the detection of [`Whitespace`].
+///
+/// See also [`RefinedText`].
+pub trait TextType = From<Text> + Token + Eq;
+
+#[derive(Debug, PartialEq, Eq, Clone)]
+pub struct Text(pub SymbolId, pub Span);
+
+impl Token for Text {
+ fn ir_name() -> &'static str {
+ "XIRF Text"
+ }
+
+ fn span(&self) -> Span {
+ match self {
+ Self(_, span) => *span,
+ }
+ }
+}
+
+impl Display for Text {
+ fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+ // TODO: We'll need care to output text so that it does not mess up
+ // formatted output.
+ // Further,
+ // text can be any arbitrary length,
+ // and so should probably be elided after a certain length.
+ write!(f, "text")
+ }
+}
+
+/// A sequence of one or more whitespace characters.
+///
+/// Whitespace here is expected to consist of `[ \n\t\r]`
+/// (where the first character in that class is a space).
+#[derive(Debug, PartialEq, Eq, Clone)]
+pub struct Whitespace(pub Text);
+
+impl Display for Whitespace {
+ fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+ // TODO: Escape output as necessary so that we can render the symbol
+ // string.
+ // See also `<Text as Display>::fmt` TODO.
+ write!(f, "whitespace")
+ }
+}
+
+/// Text that has been refined to a more descriptive form.
+///
+/// This type may be used as a [`TextType`] to instruct XIRF to detect
+/// [`Whitespace`].
+#[derive(Debug, PartialEq, Eq, Clone)]
+pub enum RefinedText {
+ /// Provided [`Text`] has been determined to be [`Whitespace`].
+ Whitespace(Whitespace),
+ /// Provided [`Text`] was not able to be refined into a more specific
+ /// type.
+ Unrefined(Text),
+}
+
+impl Token for RefinedText {
+ fn ir_name() -> &'static str {
+ "XIRF RefinedText"
+ }
+
+ fn span(&self) -> Span {
+ match self {
+ Self::Whitespace(Whitespace(text)) | Self::Unrefined(text) => {
+ text.span()
+ }
+ }
+ }
+}
+
+impl Display for RefinedText {
+ fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+ match self {
+ Self::Whitespace(ws) => Display::fmt(ws, f),
+ Self::Unrefined(text) => Display::fmt(text, f),
+ }
+ }
+}
+
+impl From<Text> for RefinedText {
+ fn from(text: Text) -> Self {
+ match text {
+ Text(sym, _) if is_whitespace(sym) => {
+ Self::Whitespace(Whitespace(text))
+ }
+ _ => Self::Unrefined(text),
+ }
+ }
+}
+
/// XIRF-compatible attribute parser.
pub trait FlatAttrParseState<const MAX_DEPTH: usize> =
- ParseState<Token = XirToken, Object = Attr>
+ ClosedParseState<Token = XirToken, Object = Attr>
where
Self: Default,
<Self as ParseState>::Error: Into<XirToXirfError>,
@@ -183,14 +301,14 @@ type ElementStack<const MAX_DEPTH: usize> = ArrayVec<(QName, Span), MAX_DEPTH>;
/// XIRF document parser state.
///
/// This parser is a pushdown automaton that parses a single XML document.
-#[derive(Debug, Default, PartialEq, Eq)]
-pub enum XirToXirf<const MAX_DEPTH: usize, SA = AttrParseState>
+#[derive(Debug, PartialEq, Eq)]
+pub enum XirToXirf<const MAX_DEPTH: usize, T, SA = AttrParseState>
where
SA: FlatAttrParseState<MAX_DEPTH>,
+ T: TextType,
{
/// Document parsing has not yet begun.
- #[default]
- PreRoot,
+ PreRoot(PhantomData<T>),
/// Parsing nodes.
NodeExpected,
/// Delegating to attribute parser.
@@ -199,15 +317,59 @@ where
Done,
}
+impl<const MAX_DEPTH: usize, T, SA> Default for XirToXirf<MAX_DEPTH, T, SA>
+where
+ SA: FlatAttrParseState<MAX_DEPTH>,
+ T: TextType,
+{
+ fn default() -> Self {
+ Self::PreRoot(PhantomData::default())
+ }
+}
+
pub type StateContext<const MAX_DEPTH: usize> =
Context<ElementStack<MAX_DEPTH>>;
-impl<const MAX_DEPTH: usize, SA> ParseState for XirToXirf<MAX_DEPTH, SA>
+/// Whether the given [`SymbolId`] is all whitespace according to
+/// [`is_xml_whitespace_char`].
+///
+/// This will first consult the pre-interned whitespace symbol list using
+/// [`is_common_whitespace`].
+/// If that check fails,
+/// it will resort to looking up the symbol and performing a linear scan
+/// of the string,
+/// terminating early if a non-whitespace character is found.
+///
+/// Note that the empty string is considered to be whitespace.
+#[inline]
+fn is_whitespace(sym: SymbolId) -> bool {
+ // See `sym::prefill`;
+ // this may require maintenance to keep the prefill list up-to-date
+ // with common whitespace symbols to avoid symbol lookups.
+ // This common check is purely a performance optimization.
+ is_common_whitespace(sym) || {
+ // If this is called often and is too expensive,
+ // it may be worth caching metadata about symbols,
+ // either for XIRF or globally.
+ // This requires multiple dereferences
+ // (for looking up the intern for the `SymbolId`,
+ // which may result in multiple (CPU) cache misses,
+ // but that would have to be profiled since the symbol may
+ // have just been interned and may be cached still)
+ // and then a linear scan of the associated `str`,
+ // though it will terminate as soon as it finds a non-whitespace
+ // character.
+ sym.lookup_str().chars().all(is_xml_whitespace_char)
+ }
+}
+
+impl<const MAX_DEPTH: usize, T, SA> ParseState for XirToXirf<MAX_DEPTH, T, SA>
where
SA: FlatAttrParseState<MAX_DEPTH>,
+ T: TextType,
{
type Token = XirToken;
- type Object = XirfToken;
+ type Object = XirfToken<T>;
type Error = XirToXirfError;
type Context = StateContext<MAX_DEPTH>;
@@ -220,14 +382,24 @@ where
match (self, tok) {
// Comments are permitted before and after the first root element.
- (st @ (PreRoot | Done), XirToken::Comment(sym, span)) => {
- Transition(st).ok(XirfToken::Comment(sym, span))
+ (st @ (PreRoot(_) | Done), XirToken::Comment(sym, span)) => {
+ let depth = Depth(stack.len());
+ Transition(st).ok(XirfToken::Comment(sym, span, depth))
}
- (PreRoot, tok @ XirToken::Open(..)) => Self::parse_node(tok, stack),
+ // Ignore whitespace before or after root.
+ (st @ (PreRoot(_) | Done), XirToken::Text(sym, _))
+ if is_whitespace(sym) =>
+ {
+ Transition(st).incomplete()
+ }
- (PreRoot, tok) => {
- Transition(PreRoot).err(XirToXirfError::RootOpenExpected(tok))
+ (PreRoot(_), tok @ XirToken::Open(..)) => {
+ Self::parse_node(tok, stack)
+ }
+
+ (st @ PreRoot(_), tok) => {
+ Transition(st).err(XirToXirfError::RootOpenExpected(tok))
}
(NodeExpected, tok) => Self::parse_node(tok, stack),
@@ -249,7 +421,7 @@ where
/// Intuitively,
/// this means that the parser must have encountered the closing tag
/// for the root element.
- fn is_accepting(&self) -> bool {
+ fn is_accepting(&self, _: &Self::Context) -> bool {
// TODO: It'd be nice if we could also return additional context to
// aid the user in diagnosing the problem,
// e.g. what element(s) still need closing.
@@ -257,25 +429,27 @@ where
}
}
-impl<const MAX_DEPTH: usize, SA> Display for XirToXirf<MAX_DEPTH, SA>
+impl<const MAX_DEPTH: usize, T, SA> Display for XirToXirf<MAX_DEPTH, T, SA>
where
SA: FlatAttrParseState<MAX_DEPTH>,
+ T: TextType,
{
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
use XirToXirf::*;
match self {
- PreRoot => write!(f, "expecting document root"),
+ PreRoot(_) => write!(f, "expecting document root"),
NodeExpected => write!(f, "expecting a node"),
AttrExpected(sa) => Display::fmt(sa, f),
- Done => write!(f, "done parsing"),
+ Done => write!(f, "done parsing document root"),
}
}
}
-impl<const MAX_DEPTH: usize, SA> XirToXirf<MAX_DEPTH, SA>
+impl<const MAX_DEPTH: usize, T, SA> XirToXirf<MAX_DEPTH, T, SA>
where
SA: FlatAttrParseState<MAX_DEPTH>,
+ T: TextType,
{
/// Parse a token while in a state expecting a node.
fn parse_node(
@@ -283,7 +457,8 @@ where
stack: &mut ElementStack<MAX_DEPTH>,
) -> TransitionResult<Self> {
use XirToXirf::{AttrExpected, Done, NodeExpected};
- use XirfToken::*;
+
+ let depth = Depth(stack.len());
match tok {
XirToken::Open(qname, span) if stack.len() == MAX_DEPTH => {
@@ -294,15 +469,11 @@ where
}
XirToken::Open(qname, span) => {
- let depth = stack.len();
stack.push((qname, span.tag_span()));
// Delegate to the attribute parser until it is complete.
- Transition(AttrExpected(SA::default())).ok(Open(
- qname,
- span,
- Depth(depth),
- ))
+ Transition(AttrExpected(SA::default()))
+ .ok(XirfToken::Open(qname, span, depth))
}
XirToken::Close(close_oqname, close_span) => {
@@ -321,16 +492,14 @@ where
}
// Final closing tag (for root node) completes the document.
- (..) if stack.len() == 0 => Transition(Done).ok(Close(
- close_oqname,
- close_span,
- Depth(0),
- )),
+ (..) if stack.len() == 0 => Transition(Done).ok(
+ XirfToken::Close(close_oqname, close_span, Depth(0)),
+ ),
(..) => {
let depth = stack.len();
- Transition(NodeExpected).ok(Close(
+ Transition(NodeExpected).ok(XirfToken::Close(
close_oqname,
close_span,
Depth(depth),
@@ -339,17 +508,14 @@ where
}
}
- XirToken::Comment(sym, span) => {
- Transition(NodeExpected).ok(Comment(sym, span))
- }
- XirToken::Text(sym, span) => {
- Transition(NodeExpected).ok(Text(sym, span))
- }
+ XirToken::Comment(sym, span) => Transition(NodeExpected)
+ .ok(XirfToken::Comment(sym, span, depth)),
+
+ XirToken::Text(sym, span) => Transition(NodeExpected)
+ .ok(XirfToken::Text(T::from(Text(sym, span)), depth)),
+
XirToken::CData(sym, span) => {
- Transition(NodeExpected).ok(CData(sym, span))
- }
- XirToken::Whitespace(ws, span) => {
- Transition(NodeExpected).ok(Whitespace(ws, span))
+ Transition(NodeExpected).ok(XirfToken::CData(sym, span, depth))
}
// We should transition to `State::Attr` before encountering any
@@ -365,10 +531,10 @@ where
/// Produce a streaming parser lowering a XIR [`TokenStream`] into a XIRF
/// stream.
-pub fn parse<const MAX_DEPTH: usize>(
+pub fn parse<const MAX_DEPTH: usize, T: TextType>(
toks: impl TokenStream,
-) -> impl Iterator<Item = ParsedResult<XirToXirf<MAX_DEPTH>>> {
- XirToXirf::<MAX_DEPTH>::parse(toks)
+) -> impl Iterator<Item = ParsedResult<XirToXirf<MAX_DEPTH, T>>> {
+ XirToXirf::<MAX_DEPTH, T>::parse(toks)
}
/// Parsing error from [`XirToXirf`].
diff --git a/tamer/src/xir/flat/test.rs b/tamer/src/xir/flat/test.rs
index c315078..d520851 100644
--- a/tamer/src/xir/flat/test.rs
+++ b/tamer/src/xir/flat/test.rs
@@ -26,8 +26,8 @@ use std::assert_matches::assert_matches;
use super::*;
use crate::convert::ExpectInto;
-use crate::parse::{ParseError, Parsed};
-use crate::span::DUMMY_SPAN;
+use crate::parse::{FinalizeError, ParseError, Parsed};
+use crate::span::dummy::*;
use crate::sym::GlobalSymbolIntern;
use crate::xir::test::{
close as xir_close, close_empty as xir_close_empty, open as xir_open,
@@ -38,11 +38,11 @@ use std::fmt::Debug;
///
/// This function is not suitable for production use as it does not produce
/// a complete [`OpenSpan`].
-pub fn open<Q: TryInto<QName>, S: Into<OpenSpan>>(
+pub fn open<Q: TryInto<QName>, S: Into<OpenSpan>, T: TextType>(
qname: Q,
span: S,
depth: Depth,
-) -> XirfToken
+) -> XirfToken<T>
where
<Q as TryInto<QName>>::Error: Debug,
{
@@ -56,7 +56,10 @@ where
///
/// This function is not suitable for production use as it does not produce
/// a complete [`OpenSpan`].
-pub fn close_empty<S: Into<CloseSpan>>(span: S, depth: Depth) -> XirfToken {
+pub fn close_empty<S: Into<CloseSpan>, T: TextType>(
+ span: S,
+ depth: Depth,
+) -> XirfToken<T> {
XirfToken::Close(None, span.into(), depth)
}
@@ -66,33 +69,28 @@ pub fn close_empty<S: Into<CloseSpan>>(span: S, depth: Depth) -> XirfToken {
///
/// This function is not suitable for production use as it does not produce
/// a complete [`OpenSpan`].
-pub fn close<Q: TryInto<QName>, S: Into<CloseSpan>>(
+pub fn close<Q: TryInto<QName>, S: Into<CloseSpan>, T: TextType>(
qname: Option<Q>,
span: S,
depth: Depth,
-) -> XirfToken
+) -> XirfToken<T>
where
<Q as TryInto<QName>>::Error: Debug,
{
XirfToken::Close(qname.map(ExpectInto::unwrap_into), span.into(), depth)
}
-const S: Span = DUMMY_SPAN;
-const S2: Span = S.offset_add(1).unwrap();
-const S3: Span = S2.offset_add(1).unwrap();
-const S4: Span = S3.offset_add(1).unwrap();
-
#[test]
fn empty_element_self_close() {
let name = ("ns", "elem");
- let toks = [xir_open(name, S), xir_close_empty(S2)].into_iter();
+ let toks = [xir_open(name, S1), xir_close_empty(S2)].into_iter();
- let sut = parse::<1>(toks);
+ let sut = parse::<1, Text>(toks);
assert_eq!(
Ok(vec![
- Parsed::Object(open(name, S, Depth(0))),
+ Parsed::Object(open(name, S1, Depth(0))),
Parsed::Object(close_empty(S2, Depth(0))),
]),
sut.collect(),
@@ -105,13 +103,13 @@ fn empty_element_self_close() {
fn empty_element_balanced_close() {
let name = ("ns", "openclose");
- let toks = [xir_open(name, S), xir_close(Some(name), S2)].into_iter();
+ let toks = [xir_open(name, S1), xir_close(Some(name), S2)].into_iter();
- let sut = parse::<1>(toks);
+ let sut = parse::<1, Text>(toks);
assert_eq!(
Ok(vec![
- Parsed::Object(open(name, S, Depth(0))),
+ Parsed::Object(open(name, S1, Depth(0))),
Parsed::Object(close(Some(name), S2, Depth(0))),
]),
sut.collect(),
@@ -127,16 +125,16 @@ fn extra_closing_tag() {
let name = ("ns", "openclose");
let toks = [
// We need an opening tag to actually begin document parsing.
- xir_open(name, S),
+ xir_open(name, S1),
xir_close(Some(name), S2),
xir_close(Some(name), S3),
]
.into_iter();
- let sut = parse::<1>(toks);
+ let sut = parse::<1, Text>(toks);
assert_matches!(
- sut.collect::<Result<Vec<Parsed<XirfToken>>, _>>(),
+ sut.collect::<Result<Vec<Parsed<_>>, _>>(),
Err(ParseError::UnexpectedToken(
XirToken::Close(Some(given_name), given_span),
_
@@ -152,16 +150,16 @@ fn extra_self_closing_tag() {
let name = ("ns", "openclose");
let toks = [
// We need an opening tag to actually begin document parsing.
- xir_open(name, S),
+ xir_open(name, S1),
xir_close_empty(S2),
xir_close_empty(S3),
]
.into_iter();
- let sut = parse::<1>(toks);
+ let sut = parse::<1, Text>(toks);
assert_matches!(
- sut.collect::<Result<Vec<Parsed<XirfToken>>, _>>(),
+ sut.collect::<Result<Vec<Parsed<_>>, _>>(),
Err(ParseError::UnexpectedToken(XirToken::Close(None, given_span), _))
if given_span == S3.into(),
);
@@ -175,18 +173,18 @@ fn empty_element_unbalanced_close() {
let close_name = "unbalanced_name".unwrap_into();
let toks =
- [xir_open(open_name, S), xir_close(Some(close_name), S2)].into_iter();
+ [xir_open(open_name, S1), xir_close(Some(close_name), S2)].into_iter();
- let mut sut = parse::<1>(toks);
+ let mut sut = parse::<1, Text>(toks);
assert_eq!(
sut.next(),
- Some(Ok(Parsed::Object(open(open_name, S, Depth(0)))))
+ Some(Ok(Parsed::Object(open(open_name, S1, Depth(0)))))
);
assert_eq!(
sut.next(),
Some(Err(ParseError::StateError(XirToXirfError::UnbalancedTag {
- open: (open_name, S),
+ open: (open_name, S1),
close: (close_name, S2),
})))
);
@@ -199,18 +197,18 @@ fn single_empty_child() {
let child = "child";
let toks = [
- xir_open(name, S),
+ xir_open(name, S1),
xir_open(child, S2),
xir_close_empty(S3),
xir_close(Some(name), S4),
]
.into_iter();
- let sut = parse::<2>(toks);
+ let sut = parse::<2, Text>(toks);
assert_eq!(
Ok(vec![
- Parsed::Object(open(name, S, Depth(0))),
+ Parsed::Object(open(name, S1, Depth(0))),
Parsed::Object(open(child, S2, Depth(1))),
Parsed::Object(close_empty(S3, Depth(1))),
Parsed::Object(close(Some(name), S4, Depth(0))),
@@ -225,17 +223,17 @@ fn depth_exceeded() {
let exceed = "exceed".unwrap_into();
let toks = [
- xir_open(name, S),
+ xir_open(name, S1),
// This one exceeds the max depth, ...
xir_open(exceed, S2),
]
.into_iter();
// ...which is set here: MAX_DEPTH here is 1
- let mut sut = parse::<1>(toks);
+ let mut sut = parse::<1, Text>(toks);
assert_eq!(
- Some(Ok(Parsed::Object(open(name, S, Depth(0))))),
+ Some(Ok(Parsed::Object(open(name, S1, Depth(0))))),
sut.next()
);
assert_eq!(
@@ -258,7 +256,7 @@ fn empty_element_with_attrs() {
let val2 = "val2".intern();
let toks = [
- xir_open(name, S),
+ xir_open(name, S1),
XirToken::AttrName(attr1, S2),
XirToken::AttrValue(val1, S3),
XirToken::AttrName(attr2, S3),
@@ -267,11 +265,11 @@ fn empty_element_with_attrs() {
]
.into_iter();
- let sut = parse::<2>(toks);
+ let sut = parse::<2, Text>(toks);
assert_eq!(
Ok(vec![
- Parsed::Object(open(name, S, Depth(0))),
+ Parsed::Object(open(name, S1, Depth(0))),
Parsed::Incomplete,
Parsed::Object(XirfToken::Attr(Attr::new(attr1, val1, (S2, S3)))),
Parsed::Incomplete,
@@ -290,23 +288,23 @@ fn child_element_after_attrs() {
let val = "val".intern();
let toks = [
- xir_open(name, S),
- XirToken::AttrName(attr, S),
+ xir_open(name, S1),
+ XirToken::AttrName(attr, S1),
XirToken::AttrValue(val, S2),
- xir_open(child, S),
+ xir_open(child, S1),
xir_close_empty(S2),
xir_close(Some(name), S3),
]
.into_iter();
- let sut = parse::<2>(toks);
+ let sut = parse::<2, Text>(toks);
assert_eq!(
Ok(vec![
- Parsed::Object(open(name, S, Depth(0))),
+ Parsed::Object(open(name, S1, Depth(0))),
Parsed::Incomplete,
- Parsed::Object(XirfToken::Attr(Attr::new(attr, val, (S, S2)))),
- Parsed::Object(open(child, S, Depth(1))),
+ Parsed::Object(XirfToken::Attr(Attr::new(attr, val, (S1, S2)))),
+ Parsed::Object(open(child, S1, Depth(1))),
Parsed::Object(close_empty(S2, Depth(1))),
Parsed::Object(close(Some(name), S3, Depth(0))),
]),
@@ -321,7 +319,7 @@ fn element_with_empty_sibling_children() {
let childb = "childb";
let toks = [
- xir_open(parent, S),
+ xir_open(parent, S1),
xir_open(childa, S2),
xir_close_empty(S3),
xir_open(childb, S2),
@@ -330,11 +328,11 @@ fn element_with_empty_sibling_children() {
]
.into_iter();
- let sut = parse::<2>(toks);
+ let sut = parse::<2, Text>(toks);
assert_eq!(
Ok(vec![
- Parsed::Object(open(parent, S, Depth(0))),
+ Parsed::Object(open(parent, S1, Depth(0))),
Parsed::Object(open(childa, S2, Depth(1))),
Parsed::Object(close_empty(S3, Depth(1))),
Parsed::Object(open(childb, S2, Depth(1))),
@@ -354,23 +352,23 @@ fn element_with_child_with_attributes() {
let value = "attr value".intern();
let toks = [
- xir_open(parent, S),
- xir_open(child, S),
- XirToken::AttrName(attr, S),
+ xir_open(parent, S1),
+ xir_open(child, S1),
+ XirToken::AttrName(attr, S1),
XirToken::AttrValue(value, S2),
xir_close_empty(S3),
xir_close(Some(parent), S3),
]
.into_iter();
- let sut = parse::<2>(toks);
+ let sut = parse::<2, Text>(toks);
assert_eq!(
Ok(vec![
- Parsed::Object(open(parent, S, Depth(0))),
- Parsed::Object(open(child, S, Depth(1))),
+ Parsed::Object(open(parent, S1, Depth(0))),
+ Parsed::Object(open(child, S1, Depth(1))),
Parsed::Incomplete,
- Parsed::Object(XirfToken::Attr(Attr::new(attr, value, (S, S2)))),
+ Parsed::Object(XirfToken::Attr(Attr::new(attr, value, (S1, S2)))),
Parsed::Object(close_empty(S3, Depth(1))),
Parsed::Object(close(Some(parent), S3, Depth(0))),
]),
@@ -384,18 +382,18 @@ fn element_with_text() {
let text = "inner text".into();
let toks = [
- xir_open(parent, S),
+ xir_open(parent, S1),
XirToken::Text(text, S2),
xir_close(Some(parent), S3),
]
.into_iter();
- let sut = parse::<1>(toks);
+ let sut = parse::<1, Text>(toks);
assert_eq!(
Ok(vec![
- Parsed::Object(open(parent, S, Depth(0))),
- Parsed::Object(XirfToken::Text(text, S2)),
+ Parsed::Object(open(parent, S1, Depth(0))),
+ Parsed::Object(XirfToken::Text(Text(text, S2), Depth(1))),
Parsed::Object(close(Some(parent), S3, Depth(0))),
]),
sut.collect(),
@@ -405,17 +403,22 @@ fn element_with_text() {
#[test]
fn not_accepting_state_if_element_open() {
let name = "unclosed";
- let toks = [xir_open(name, S)].into_iter();
+ let toks = [xir_open(name, S1)].into_iter();
- let mut sut = parse::<1>(toks);
+ let mut sut = parse::<1, Text>(toks);
assert_eq!(
- Some(Ok(Parsed::Object(open(name, S, Depth(0))))),
+ Some(Ok(Parsed::Object(open(name, S1, Depth(0))))),
sut.next()
);
// Element was not closed.
- assert_matches!(sut.next(), Some(Err(ParseError::UnexpectedEof(..))));
+ assert_matches!(
+ sut.next(),
+ Some(Err(ParseError::FinalizeError(
+ FinalizeError::UnexpectedEof(..)
+ )))
+ );
}
// XML permits comment nodes before and after the document root element.
@@ -426,21 +429,49 @@ fn comment_before_or_after_root_ok() {
let cend = "end comment".intern();
let toks = [
- XirToken::Comment(cstart, S),
+ XirToken::Comment(cstart, S1),
xir_open(name, S2),
xir_close_empty(S3),
XirToken::Comment(cend, S4),
]
.into_iter();
- let sut = parse::<1>(toks);
+ let sut = parse::<1, Text>(toks);
assert_eq!(
Ok(vec![
- Parsed::Object(XirfToken::Comment(cstart, S)),
+ Parsed::Object(XirfToken::Comment(cstart, S1, Depth(0))),
Parsed::Object(open(name, S2, Depth(0))),
Parsed::Object(close_empty(S3, Depth(0))),
- Parsed::Object(XirfToken::Comment(cend, S4)),
+ Parsed::Object(XirfToken::Comment(cend, S4, Depth(0))),
+ ]),
+ sut.collect(),
+ );
+}
+
+// Similar to above,
+// but with whitespace.
+#[test]
+fn whitespace_before_or_after_root_ok() {
+ let name = "root";
+ let ws = " ".unwrap_into();
+
+ let toks = [
+ XirToken::Text(ws, S1),
+ xir_open(name, S2),
+ xir_close_empty(S3),
+ XirToken::Text(ws, S4),
+ ]
+ .into_iter();
+
+ let sut = parse::<1, RefinedText>(toks);
+
+ assert_eq!(
+ Ok(vec![
+ Parsed::Incomplete,
+ Parsed::Object(open(name, S2, Depth(0))),
+ Parsed::Object(close_empty(S3, Depth(0))),
+ Parsed::Incomplete,
]),
sut.collect(),
);
@@ -459,18 +490,18 @@ fn content_after_root_close_error() {
let name = "root".unwrap_into();
let toks = [
- xir_open(name, S),
+ xir_open(name, S1),
xir_close_empty(S2),
// Document ends here
xir_open(name, S3),
]
.into_iter();
- let sut = parse::<1>(toks);
+ let sut = parse::<1, Text>(toks);
assert_matches!(
sut.collect(),
- Result::<Vec<Parsed<XirfToken>>, _>::Err(ParseError::UnexpectedToken(
+ Result::<Vec<Parsed<_>>, _>::Err(ParseError::UnexpectedToken(
XirToken::Open(given_name, given_span),
_)) if given_name == name && given_span == S3.into()
);
@@ -481,14 +512,59 @@ fn content_after_root_close_error() {
fn content_before_root_open_error() {
let text = "foo".intern();
- let toks = [XirToken::Text(text, S)].into_iter();
+ let toks = [XirToken::Text(text, S1)].into_iter();
- let sut = parse::<1>(toks);
+ let sut = parse::<1, Text>(toks);
assert_eq!(
- Result::<Vec<Parsed<XirfToken>>, _>::Err(ParseError::StateError(
- XirToXirfError::RootOpenExpected(XirToken::Text(text, S))
+ Result::<Vec<Parsed<_>>, _>::Err(ParseError::StateError(
+ XirToXirfError::RootOpenExpected(XirToken::Text(text, S1))
)),
sut.collect()
);
}
+
+#[test]
+fn whitespace_refinement() {
+ // Nothing exhaustive;
+ // just check some notable examples.
+ vec![
+ ("".into(), true),
+ (" ".into(), true),
+ ("\n".into(), true),
+ ("\n\n\t ".into(), true),
+ (" foo ".into(), false),
+ ("\n .".into(), false),
+ (".\n ".into(), false),
+ ]
+ .into_iter()
+ .for_each(|(given, expected)| {
+ let mut sut = parse::<1, RefinedText>(
+ vec![xir_open("root", S1), XirToken::Text(given, S1)].into_iter(),
+ );
+
+ let _ = sut.next(); // discard root
+
+ match sut.next().unwrap().unwrap() {
+ Parsed::Object(XirfToken::Text(
+ RefinedText::Whitespace(Whitespace(Text(ws, span))),
+ Depth(1),
+ )) => {
+ assert_eq!(ws, given);
+ assert_eq!(span, S1);
+ assert!(expected == true)
+ }
+
+ Parsed::Object(XirfToken::Text(
+ RefinedText::Unrefined(Text(text, span)),
+ Depth(1),
+ )) => {
+ assert_eq!(text, given);
+ assert_eq!(span, S1);
+ assert!(expected == false)
+ }
+
+ unexpected => panic!("unexpected token: {unexpected:?}"),
+ }
+ });
+}
diff --git a/tamer/src/xir/fmt.rs b/tamer/src/xir/fmt.rs
index 620b775..5bbf7d7 100644
--- a/tamer/src/xir/fmt.rs
+++ b/tamer/src/xir/fmt.rs
@@ -19,17 +19,42 @@
//! XIR formatting types for use with [`crate::fmt`]
-use crate::fmt::{AndQualConjList, Delim, Prefix, Raw, Tt};
+use crate::fmt::{
+ AndQualConjList, Delim, OrQualConjList, Prefix, Raw, Suffix, Tt, TtQuote,
+};
/// Denote an XML attribute by prefixing the value with `@`.
pub type XmlAttr = Prefix<"@", Raw>;
+/// [`XmlAttr`] formatted as teletypewriter
+/// (for use in sentences).
+pub type TtXmlAttr = Tt<XmlAttr>;
+
/// A list of XML attributes [`Tt`]-quoted.
pub type XmlAttrList = AndQualConjList<"attribute", "attributes", Tt<XmlAttr>>;
/// Opening tag for XML element.
pub type OpenXmlEle = Delim<"<", ">", Raw>;
+/// Opening tag for XML element.
+pub type CloseXmlEle = Delim<"</", ">", Raw>;
+
+/// "`ns:*`" given a namespace prefix `ns`.
+///
+/// TODO: It'd be nice to be able to have Raw require a specific type to
+/// ensure that we're given a prefix.
+pub type XmlPrefixAnyLocal = Suffix<":*", Raw>;
+
/// Opening tag for XML element as teletypewriter
/// (for use in sentences).
pub type TtOpenXmlEle = Tt<OpenXmlEle>;
+
+/// Closing tag for XML element as teletypewriter
+/// (for use in sentences).
+pub type TtCloseXmlEle = Tt<CloseXmlEle>;
+
+/// A choice of a list of XML elements by name.
+pub type EleSumList = OrQualConjList<"element", "one of elements", TtQuote>;
+
+/// Quote an attribute value using double quotes.
+pub type XmlAttrValueQuote = Delim<"\"", "\"", Raw>;
diff --git a/tamer/src/xir/iter.rs b/tamer/src/xir/iter.rs
index 30452d3..cbc7dd6 100644
--- a/tamer/src/xir/iter.rs
+++ b/tamer/src/xir/iter.rs
@@ -86,7 +86,7 @@ impl<I: TokenStream> Iterator for ElemWrapIter<I> {
#[cfg(test)]
mod test {
use super::*;
- use crate::{convert::ExpectInto, span::DUMMY_SPAN, xir::Token};
+ use crate::{convert::ExpectInto, span::dummy::DUMMY_SPAN, xir::Token};
#[test]
fn elem_wrap_iter() {
diff --git a/tamer/src/xir/parse.rs b/tamer/src/xir/parse.rs
index ff3b8f0..f0b4737 100644
--- a/tamer/src/xir/parse.rs
+++ b/tamer/src/xir/parse.rs
@@ -24,5 +24,11 @@
mod attr;
mod ele;
+mod error;
-pub use attr::{AttrParseError, AttrParseState};
+pub use attr::{parse_attrs, AttrParseState};
+pub use ele::{
+ AttrFieldOp, AttrFieldSum, EleParseState, NodeMatcher, Nt, NtState,
+ StateStack, SumNt, SumNtState, SuperCtx, SuperState, SuperStateContext,
+};
+pub use error::{AttrParseError, NtError, SumNtError};
diff --git a/tamer/src/xir/parse/attr.rs b/tamer/src/xir/parse/attr.rs
index 6a0bc10..b13287c 100644
--- a/tamer/src/xir/parse/attr.rs
+++ b/tamer/src/xir/parse/attr.rs
@@ -37,95 +37,31 @@
//! The parser automatically produces detailed error and diagnostic
//! messages.
+use super::AttrParseError;
use crate::{
- diagnose::{Annotate, AnnotatedSpan, Diagnostic},
- fmt::ListDisplayWrapper,
- parse::ParseState,
- xir::{attr::Attr, fmt::XmlAttrList, EleSpan, OpenSpan, QName},
+ diagnose::Diagnostic,
+ parse::{ClosedParseState, ParseState},
+ xir::{OpenSpan, QName},
};
-use std::{error::Error, fmt::Display};
-
-pub type ElementQName = QName;
-
-/// Error while parsing element attributes.
-#[derive(Debug, PartialEq)]
-pub enum AttrParseError<S: AttrParseState> {
- /// One or more required attributes are missing.
- ///
- /// Since required attributes are not checked until parsing is complete,
- /// and that determination requires a token of lookahead,
- /// this error produces a lookahead token that must be handled by the
- /// caller.
- ///
- /// This also provices the actual [`AttrParseState`],
- /// which can be used to retrieve the missing required attributes
- /// (using [`AttrParseState::required_missing`]),
- /// can be used to retrieve information about the attributes that
- /// _have_ been successfully parsed,
- /// and can be used to resume parsing if desired.
- ///
- /// The caller must determine whether to proceed with parsing of the
- /// element despite these problems;
- /// such recovery is beyond the scope of this parser.
- MissingRequired(S),
-
- /// An attribute was encountered that was not expected by this parser.
- ///
- /// Parsing may recover by simply ignoring this attribute.
- UnexpectedAttr(Attr, ElementQName),
-}
-
-impl<S: AttrParseState> Display for AttrParseError<S> {
- fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
- match self {
- Self::MissingRequired(st) => {
- let ele_name = st.element_name();
- write!(f, "element `{ele_name}` missing required ")?;
-
- XmlAttrList::fmt(&st.required_missing(), f)
- }
-
- Self::UnexpectedAttr(attr, ele_name) => {
- write!(
- f,
- "element `{ele_name}` contains unexpected attribute `{attr}`"
- )
- }
- }
- }
-}
-
-impl<S: AttrParseState> Error for AttrParseError<S> {
- fn source(&self) -> Option<&(dyn Error + 'static)> {
- None
- }
-}
-
-impl<S: AttrParseState> Diagnostic for AttrParseError<S> {
- fn describe(&self) -> Vec<AnnotatedSpan> {
- match self {
- Self::MissingRequired(st) => st
- .element_span()
- .tag_span()
- .error(format!(
- "missing required {}",
- XmlAttrList::wrap(&st.required_missing()),
- ))
- .into(),
-
- // TODO: help stating attributes that can appear instead
- Self::UnexpectedAttr(attr @ Attr(.., aspan), ele_name) => aspan
- .key_span()
- .error(format!("element `{ele_name}` cannot contain `{attr}`"))
- .into(),
- }
- }
-}
+use std::{convert::Infallible, fmt::Debug};
/// Attribute parsing automaton.
///
/// These parsers are generated by [`attr_parse!`](crate::attr_parse).
-pub trait AttrParseState: ParseState {
+pub trait AttrParseState: ClosedParseState {
+ /// Type of error for failed parsing of attribute values.
+ ///
+ /// These originate from [`TryFrom`] conversions on the attribute
+ /// value.
+ /// The default is [`Infallible`],
+ /// meaning such conversion cannot fail and [`From`] may be used in
+ /// place of [`TryFrom`].
+ type ValueError: Diagnostic + PartialEq = Infallible;
+
+ /// Object holding the current state of field aggregation,
+ /// before the yield of the final object.
+ type Fields: Debug + PartialEq + Eq;
+
/// Begin attribute parsing within the context of the provided element.
///
/// This is used to provide diagnostic information.
@@ -149,10 +85,13 @@ pub trait AttrParseState: ParseState {
/// are missing.
/// The list of missing fields is generated dynamically during
/// diagnostic reporting.
- fn finalize_attr(self) -> Result<Self::Object, AttrParseError<Self>>;
+ fn finalize_attr(
+ self,
+ ctx: &mut <Self as ParseState>::Context,
+ ) -> Result<Self::Object, AttrParseError<Self>>;
/// Names of attributes that are required but do not yet have a value.
- fn required_missing(&self) -> Vec<QName>;
+ fn required_missing(&self, ctx: &Self::Fields) -> Vec<QName>;
}
/// Parse attributes for the given element.
@@ -160,7 +99,6 @@ pub trait AttrParseState: ParseState {
/// This function is useful when the type of [`AttrParseState`] `S` can be
/// inferred,
/// so that the expression reads more like natural language.
-#[cfg(test)] // currently only used by tests; remove when ready
pub fn parse_attrs<S: AttrParseState>(ele: QName, span: OpenSpan) -> S {
S::with_element(ele, span)
}
@@ -168,21 +106,26 @@ pub fn parse_attrs<S: AttrParseState>(ele: QName, span: OpenSpan) -> S {
#[macro_export]
macro_rules! attr_parse {
($(#[$sattr:meta])*
- $vis:vis struct $state_name:ident -> $struct_name:ident {
+ $(vis($vis:vis);)?
+ $(type ValueError = $evty:ty;)?
+
+ struct $(#[$st_attr:meta])? $state_name:ident -> $struct_name:ident {
$(
$(#[$fattr:meta])*
$field:ident: ($qname:ident $($fmod:tt)?) => $ty:ty,
)*
}
- ) => {
+ ) => { paste::paste! {
$(
// This provides a nice error on $ty itself at the call site,
// rather than relying on `Into::into` to cause the error
// later on,
// which places the error inside the macro definition.
- assert_impl_all!($ty: From<crate::xir::attr::Attr>);
+ $crate::attr_parse!(@ty_assert $($fmod)? $ty);
)*
+ $(#[$st_attr])?
+ ///
#[doc=concat!("Parser producing [`", stringify!($struct_name), "`].")]
///
/// Unlike the final type,
@@ -192,56 +135,72 @@ macro_rules! attr_parse {
#[doc=concat!("[`", stringify!($struct_name), "`].")]
///
/// This object is exposed for recovery and error reporting on
- /// [`AttrParseError::MissingRequired`].
+ /// [`AttrParseError::MissingRequired`][MissingRequired].
+ ///
+ /// [MissingRequired]: crate::xir::parse::AttrParseError::MissingRequired
+ // TODO: This can be extracted out of the macro.
#[derive(Debug, PartialEq, Eq)]
- $vis struct $state_name {
- #[doc(hidden)]
- ___ctx: (crate::xir::QName, crate::xir::OpenSpan),
- #[doc(hidden)]
- ___done: bool,
+ $($vis)? enum $state_name {
+ Parsing(crate::xir::QName, crate::xir::OpenSpan),
+ Done(crate::xir::QName, crate::xir::OpenSpan),
+ }
+
+ #[doc(hidden)]
+ #[allow(non_camel_case_types)]
+ $($vis)? type [<$state_name Context>] =
+ crate::parse::Context<[<$state_name Fields>]>;
+
+ /// Intermediate state of parser as fields are aggregated.
+ #[allow(non_camel_case_types)]
+ #[derive(Debug, PartialEq, Eq, Default)]
+ $($vis)? struct [<$state_name Fields>] {
$(
- pub $field: Option<$ty>,
+ // Value + key span
+ pub $field: Option<($ty, crate::span::Span)>,
)*
}
impl crate::xir::parse::AttrParseState for $state_name {
+ type ValueError = $crate::attr_parse!(@evty $($evty)?);
+ type Fields = [<$state_name Fields>];
+
fn with_element(
ele: crate::xir::QName,
span: crate::xir::OpenSpan
) -> Self {
- Self {
- ___ctx: (ele, span),
- ___done: false,
- $(
- $field: None,
- )*
- }
+ Self::Parsing(ele, span)
}
fn element_name(&self) -> crate::xir::QName {
- match self.___ctx {
- (name, _) => name,
+ match self {
+ Self::Parsing(qname, _) | Self::Done(qname, _) => *qname,
}
}
fn element_span(&self) -> crate::xir::OpenSpan {
- match self.___ctx {
- (_, span) => span,
+ match self {
+ Self::Parsing(_, span) | Self::Done(_, span) => *span,
}
}
fn finalize_attr(
self,
+ ctx: &mut <Self as crate::parse::ParseState>::Context,
) -> Result<
Self::Object,
crate::xir::parse::AttrParseError<Self>,
> {
+ // Will be unused if there are no fields.
+ #[allow(unused_variables)]
+ let fields: Self::Fields = std::mem::take(ctx);
+
// Validate required fields before we start moving data.
$(
- $crate::attr_parse!(@if_missing_req $($fmod)? self.$field {
+ $crate::attr_parse!(@if_missing_req $($fmod)? fields.$field {
return Err(
crate::xir::parse::AttrParseError::MissingRequired(
self,
+ fields,
)
)
});
@@ -250,7 +209,7 @@ macro_rules! attr_parse {
let obj = $struct_name {
$(
$field: $crate::attr_parse!(
- @maybe_value $($fmod)? self.$field
+ @maybe_value $($fmod)? fields.$field
),
)*
};
@@ -258,12 +217,16 @@ macro_rules! attr_parse {
Ok(obj)
}
- fn required_missing(&self) -> Vec<crate::xir::QName> {
+ fn required_missing(
+ &self,
+ #[allow(unused_variables)] // unused if no fields
+ ctx: &Self::Fields
+ ) -> Vec<crate::xir::QName> {
#[allow(unused_mut)]
let mut missing = vec![];
$(
- $crate::attr_parse!(@if_missing_req $($fmod)? self.$field {
+ $crate::attr_parse!(@if_missing_req $($fmod)? ctx.$field {
missing.push($qname);
});
)*
@@ -273,24 +236,17 @@ macro_rules! attr_parse {
}
impl $state_name {
- fn done_with_element(ele: crate::xir::QName, span: OpenSpan) -> Self {
- use crate::xir::parse::attr::AttrParseState;
-
- let mut new = Self::with_element(ele, span);
- new.___done = true;
- new
+ fn done_with_element(
+ ele: crate::xir::QName,
+ span: crate::xir::OpenSpan,
+ ) -> Self {
+ Self::Done(ele, span)
}
}
$(#[$sattr])*
- #[doc=""]
- #[doc=concat!(
- "This is produced by the parser [`",
- stringify!($state_name),
- "`]."
- )]
#[derive(Debug, PartialEq)]
- $vis struct $struct_name {
+ $($vis)? struct $struct_name {
$(
$(#[$fattr])*
pub $field: $ty,
@@ -302,22 +258,33 @@ macro_rules! attr_parse {
impl std::fmt::Display for $state_name {
/// Additional error context shown in diagnostic messages for
/// certain variants of [`ParseError`].
+ ///
+ /// [`ParseError`]: crate::parse::ParseError
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
- // TODO
- write!(f, "parsing attributes")
+ use crate::fmt::{DisplayWrapper, TtQuote};
+ use crate::xir::parse::AttrParseState;
+
+ write!(
+ f,
+ "expecting attributes for element {}",
+ TtQuote::wrap(self.element_name())
+ )
}
}
impl crate::parse::ParseState for $state_name {
- type Token = crate::xir::flat::XirfToken;
+ type Token = crate::xir::flat::XirfToken<
+ crate::xir::flat::RefinedText
+ >;
type Object = $struct_name;
type Error = crate::xir::parse::AttrParseError<Self>;
+ type Context = [<$state_name Context>];
fn parse_token(
#[allow(unused_mut)]
mut self,
tok: Self::Token,
- _: crate::parse::NoContext,
+ ctx: &mut Self::Context,
) -> crate::parse::TransitionResult<Self> {
use crate::parse::{Transition, Transitionable, ParseStatus};
use crate::xir::{
@@ -325,54 +292,122 @@ macro_rules! attr_parse {
parse::{AttrParseError, AttrParseState}
};
#[allow(unused_imports)]
- use crate::xir::attr::Attr; // unused if no attrs
+ use crate::xir::attr::{Attr, AttrSpan}; // unused if no attrs
+
+ let ele_name = self.element_name();
- match tok {
+ match (self, tok) {
$(
// Use guard so we don't bind as a variable if we
// forget to import a const for `$qname`.
// We don't use `$qname:pat` because we reuse
// `$qname` for error messages.
- flat::XirfToken::Attr(attr @ Attr(qn, ..)) if qn == $qname => {
- // TODO: Error on prev value
- self.$field.replace(attr.into());
- Transition(self).incomplete()
+ (st @ Self::Parsing(_, _), flat::XirfToken::Attr(
+ attr @ Attr(qn, _, AttrSpan(kspan, _))
+ )) if qn == $qname => {
+ match ctx.$field {
+ // Duplicate attribute name
+ Some((_, first_kspan)) => {
+ Transition(st).err(
+ AttrParseError::DuplicateAttr(
+ attr,
+ first_kspan,
+ ele_name,
+ )
+ )
+ }
+
+ // First time seeing attribute name
+ None => {
+ let result = $crate::attr_parse!(
+ @into_value $($fmod)? attr
+ );
+
+ match result {
+ Ok(value) => {
+ ctx.$field.replace((
+ value,
+ kspan,
+ ));
+
+ Transition(st).incomplete()
+ },
+
+ Err(e) => Transition(st).err(
+ // Will complain about
+ // `Into::into` if Infallible.
+ #[allow(unreachable_code)]
+ AttrParseError::InvalidValue(
+ e.into(),
+ ele_name,
+ )
+ ),
+ }
+ }
+ }
}
)*
- flat::XirfToken::Attr(attr) => {
- let ele_name = self.element_name();
-
- Transition(self).err(AttrParseError::UnexpectedAttr(
+ (st @ Self::Parsing(_, _), flat::XirfToken::Attr(attr)) => {
+ Transition(st).err(AttrParseError::UnexpectedAttr(
attr,
ele_name,
))
},
- // Any tokens received after aggregation is completed
- // must not be processed,
- // otherwise we'll recurse indefinitely.
- tok_dead if self.___done => {
- Transition(self).dead(tok_dead)
- },
-
// Aggregation complete (dead state).
- tok_dead => {
- let (ele, span) = self.___ctx;
-
- self.finalize_attr()
+ (Self::Parsing(ele, span), tok_dead) => {
+ Self::Parsing(ele, span).finalize_attr(ctx)
.map(ParseStatus::Object)
.transition(Self::done_with_element(ele, span))
.with_lookahead(tok_dead)
}
+
+ // Any tokens received after aggregation is completed
+ // must not be processed,
+ // otherwise we'll recurse indefinitely.
+ (st @ Self::Done(_, _), tok_dead) => {
+ Transition(st).dead(tok_dead)
+ }
}
}
- fn is_accepting(&self) -> bool {
+ fn is_accepting(&self, _: &Self::Context) -> bool {
// We must always be consumed via the dead state.
false
}
}
+ } };
+
+ // Optional attribute if input above is of the form `(QN_FOO?) => ...`.
+ (@ty_assert ? $ty:ty) => {
+ // This type assertion isn't supported by `assert_impl_all!`.
+ // The error isn't the most clear,
+ // but it's better than nothing and we can improve upon it later
+ // on.
+ const _: fn() = || {
+ trait OptionFromAttr {}
+ impl<T: TryFrom<Attr>> OptionFromAttr for Option<T> {}
+
+ // Fail when `$ty` is not Option<impl TryFrom<Attr>>.
+ fn assert_attr_option<T: OptionFromAttr>() {}
+ assert_attr_option::<$ty>();
+ };
+ };
+
+ (@ty_assert $ty:ty) => {
+ assert_impl_all!($ty: TryFrom<crate::xir::attr::Attr>);
+ };
+
+ (@evty $evty:ty) => {
+ $evty
+ };
+
+ // If no ValueError type is provided,
+ // then it's not possible for values to fail parsing
+ // (their SymbolId is their final value).
+ (@evty) => {
+ std::convert::Infallible
};
// Optional attribute if input above is of the form `(QN_FOO?) => ...`.
@@ -389,11 +424,23 @@ macro_rules! attr_parse {
};
// Optional attribute if input above is of the form `(QN_FOO?) => ...`.
+ (@into_value ? $from:ident) => {
+ $from.try_into().map(Some)
+ };
+
+ (@into_value $from:ident) => {
+ $from.try_into()
+ };
+
+ // Optional attribute if input above is of the form `(QN_FOO?) => ...`.
(@maybe_value ? $from:ident.$field:ident) => {
// This does not produce a great error if the user forgets to use an
// `Option` type for optional attributes,
// but the comment is better than nothing.
- $from.$field.unwrap_or(None) // field type must be Option<T>
+ match $from.$field { // field type must be Option<T>
+ Some((value, _kspan)) => value,
+ None => None,
+ }
};
// Otherwise,
@@ -403,425 +450,11 @@ macro_rules! attr_parse {
// This assumes that we've already validated via
// `@validate_req` above,
// and so should never actually panic.
- $from.$field.unwrap()
+ match $from.$field.unwrap() {
+ (value, _kspan) => value
+ }
};
}
#[cfg(test)]
-mod test {
- use super::*;
- use crate::{
- parse::{ParseError, ParseState, Parsed, Parser, TokenStream},
- span::{Span, DUMMY_SPAN},
- xir::{
- attr::{Attr, AttrSpan},
- flat::{test::close_empty, Depth, XirfToken},
- st::qname::*,
- },
- };
- use std::assert_matches::assert_matches;
-
- const S1: Span = DUMMY_SPAN;
- const S2: Span = S1.offset_add(1).unwrap();
- const S3: Span = S2.offset_add(1).unwrap();
- const SE: OpenSpan = OpenSpan(S1.offset_add(100).unwrap(), 0);
-
- // Random choice of QName for tests.
- const QN_ELE: QName = QN_YIELDS;
-
- fn parse_aggregate<S: AttrParseState>(
- toks: impl TokenStream<S::Token>,
- ) -> Result<(S::Object, S::Token), ParseError<S::Token, S::Error>>
- where
- S: AttrParseState,
- S::Context: Default,
- {
- parse_aggregate_with(&mut Parser::with_state(
- S::with_element(QN_ELE, SE),
- toks,
- ))
- }
-
- fn parse_aggregate_with<S: AttrParseState, I>(
- sut: &mut Parser<S, I>,
- ) -> Result<(S::Object, S::Token), ParseError<S::Token, S::Error>>
- where
- S: ParseState,
- S::Context: Default,
- I: TokenStream<S::Token>,
- {
- let mut obj = None;
-
- for item in sut {
- match item {
- Ok(Parsed::Object(result)) => {
- obj.replace(result);
- }
- Ok(Parsed::Incomplete) => continue,
- // This represents the dead state,
- // since this is the top-level parser.
- Err(ParseError::UnexpectedToken(tok, _)) => {
- return Ok((
- obj.expect(
- "parser did not produce aggregate attribute object",
- ),
- tok,
- ))
- }
- Err(other) => return Err(other),
- }
- }
-
- panic!("expected AttrParseState dead state (obj: {obj:?})");
- }
-
- #[test]
- fn required_with_values() {
- attr_parse! {
- struct ReqValuesState -> ReqValues {
- name: (QN_NAME) => Attr,
- yields: (QN_YIELDS) => Attr,
- }
- }
-
- let attr_name = Attr(QN_NAME, "val_name".into(), AttrSpan(S1, S2));
- let attr_yields = Attr(QN_YIELDS, "val_value".into(), AttrSpan(S2, S3));
- let tok_dead = close_empty(S3, Depth(0));
-
- let toks = vec![
- XirfToken::Attr(attr_name.clone()),
- XirfToken::Attr(attr_yields.clone()),
- // Will cause dead state:
- tok_dead.clone(),
- ]
- .into_iter();
-
- assert_eq!(
- Ok((
- ReqValues {
- name: attr_name,
- yields: attr_yields,
- },
- tok_dead
- )),
- parse_aggregate::<ReqValuesState>(toks),
- );
- }
-
- // Same as above test,
- // but the order of the tokens is swapped.
- #[test]
- fn required_with_values_out_of_order() {
- attr_parse! {
- struct ReqValuesState -> ReqValues {
- name: (QN_NAME) => Attr,
- yields: (QN_YIELDS) => Attr,
- }
- }
-
- let attr_name = Attr(QN_NAME, "val_name".into(), AttrSpan(S1, S2));
- let attr_yields = Attr(QN_YIELDS, "val_value".into(), AttrSpan(S2, S3));
- let tok_dead = close_empty(S3, Depth(0));
-
- // @yields then @name just to emphasize that order does not matter.
- let toks = vec![
- XirfToken::Attr(attr_yields.clone()),
- XirfToken::Attr(attr_name.clone()),
- // Will cause dead state:
- tok_dead.clone(),
- ]
- .into_iter();
-
- assert_eq!(
- Ok((
- ReqValues {
- name: attr_name,
- yields: attr_yields,
- },
- tok_dead
- )),
- parse_aggregate::<ReqValuesState>(toks),
- );
- }
-
- #[test]
- fn optional_with_values() {
- attr_parse! {
- struct OptValuesState -> OptValues {
- name: (QN_NAME?) => Option<Attr>,
- yields: (QN_YIELDS?) => Option<Attr>,
- }
- }
-
- let attr_name = Attr(QN_NAME, "val_name".into(), AttrSpan(S1, S2));
- let attr_yields = Attr(QN_YIELDS, "val_value".into(), AttrSpan(S2, S3));
- let tok_dead = close_empty(S3, Depth(0));
-
- let toks = vec![
- XirfToken::Attr(attr_name.clone()),
- XirfToken::Attr(attr_yields.clone()),
- // Will cause dead state:
- tok_dead.clone(),
- ]
- .into_iter();
-
- assert_eq!(
- Ok((
- OptValues {
- name: Some(attr_name),
- yields: Some(attr_yields),
- },
- tok_dead
- )),
- parse_aggregate::<OptValuesState>(toks),
- );
- }
-
- #[test]
- fn optional_with_all_missing() {
- attr_parse! {
- struct OptMissingState -> OptMissing {
- name: (QN_NAME?) => Option<Attr>,
- yields: (QN_YIELDS?) => Option<Attr>,
- }
- }
-
- let tok_dead = close_empty(S3, Depth(0));
-
- let toks = vec![
- // Will cause dead state:
- tok_dead.clone(),
- ]
- .into_iter();
-
- assert_eq!(
- Ok((
- OptMissing {
- name: None,
- yields: None,
- },
- tok_dead
- )),
- parse_aggregate::<OptMissingState>(toks),
- );
- }
-
- #[test]
- fn mixed_some_optional_missing() {
- attr_parse! {
- struct MixedState -> Mixed {
- name: (QN_NAME) => Attr,
- src: (QN_SRC?) => Option<Attr>,
- yields: (QN_YIELDS?) => Option<Attr>,
- }
- }
-
- let attr_name = Attr(QN_NAME, "val_name".into(), AttrSpan(S1, S2));
- let attr_src = Attr(QN_SRC, "val_src".into(), AttrSpan(S2, S3));
- let tok_dead = close_empty(S3, Depth(0));
-
- let toks = vec![
- // `name` and `src` but no optional `yields`.
- XirfToken::Attr(attr_name.clone()),
- XirfToken::Attr(attr_src.clone()),
- // Will cause dead state:
- tok_dead.clone(),
- ]
- .into_iter();
-
- assert_eq!(
- Ok((
- Mixed {
- name: attr_name,
- src: Some(attr_src),
- yields: None,
- },
- tok_dead
- )),
- parse_aggregate::<MixedState>(toks),
- );
- }
-
- mod required {
- use super::*;
- use crate::sym::st;
-
- attr_parse! {
- struct ReqMissingState -> ReqMissing {
- name: (QN_NAME) => Attr,
- src: (QN_SRC) => Attr,
- ty: (QN_TYPE) => Attr,
- yields: (QN_YIELDS) => Attr,
- }
- }
-
- const ATTR_NAME: Attr =
- Attr(QN_NAME, st::raw::L_NAME, AttrSpan(S1, S2));
- const ATTR_YIELDS: Attr =
- Attr(QN_YIELDS, st::raw::L_VALUE, AttrSpan(S2, S3));
-
- #[test]
- fn required_missing_values() {
- let tok_dead = close_empty(S3, Depth(0));
-
- let toks = vec![
- XirfToken::Attr(ATTR_NAME),
- // <Missing @src, but no error yet.>
- // <Missing @type, but no error yet.>
- XirfToken::Attr(ATTR_YIELDS),
- // Will cause dead state,
- // which will then trigger the error:
- tok_dead.clone(),
- ]
- .into_iter();
-
- let err = parse_aggregate::<ReqMissingState>(toks)
- .expect_err("expected failure from missing attributes");
-
- // The error should provide the state of the parser during the
- // finalization step.
- // Since this happens in a dead state,
- // we must also receive the token that triggered it,
- // just as we would normally receive on successful parsing.
- assert_matches!(
- err,
- ParseError::StateError(AttrParseError::MissingRequired(
- ReqMissingState {
- name: Some(ref given_name),
- src: None, // cause of the error
- ty: None, // another cause of the error
- yields: Some(ref given_yields),
- ..
- },
- )) if given_name == &ATTR_NAME
- && given_yields == &ATTR_YIELDS
- );
- }
-
- /// Relies on [`required_missing_values`] above to verify state of the
- /// parser used in the error.
- #[test]
- fn error_contains_all_required_missing_attr_names() {
- // Manually construct the partial state rather than parsing tokens.
- // `required_missing_values` above verifies that this state is what
- // is in fact constructed from a failed parsing attempt.
- let mut partial = ReqMissingState::with_element(QN_ELE, SE);
- partial.name.replace(ATTR_NAME);
- partial.yields.replace(ATTR_YIELDS);
-
- let err = AttrParseError::MissingRequired(partial);
-
- // When represented as a string,
- // the error should produce _all_ required attributes that do not
- // have values,
- // rather than requiring the user to fix one and re-compile only
- // to encounter another,
- // and potentially repeat multiple times.
- let err_str = err.to_string();
- assert!(
- err_str.contains(&format!("@{QN_SRC}")),
- "\"{err_str}\" must contain \"@{QN_SRC}\""
- );
- assert!(
- err_str.contains(&format!("@{QN_TYPE}")),
- "\"{err_str}\" must contain \"@{QN_TYPE}\""
- );
-
- // The error should also reference the element name
- // (which is provided in `parse_aggregate`).
- assert!(
- err_str.contains(&QN_ELE.to_string()),
- "\"{err_str}\" must contain name of element being parsed"
- );
- }
-
- /// See also [`error_contains_all_required_missing_attr_names`].
- #[test]
- fn diagnostic_message_contains_all_required_missing_attr_name() {
- let mut partial = ReqMissingState::with_element(QN_ELE, SE);
- partial.name.replace(ATTR_NAME);
- partial.yields.replace(ATTR_YIELDS);
-
- let err = AttrParseError::MissingRequired(partial);
- let desc = err.describe();
-
- // The diagnostic message should reference the element.
- assert_eq!(desc[0].span(), SE.span());
-
- // It should re-state the required attributes,
- // since this is where the user will most likely be looking.
- let label_str = desc[0]
- .label()
- .expect("missing diagnostic label")
- .to_string();
-
- assert!(
- label_str.contains(&format!("@{QN_SRC}")),
- "diagnostic label \"{label_str}\" must contain \"@{QN_SRC}\""
- );
- assert!(
- label_str.contains(&format!("@{QN_TYPE}")),
- "diagnostic label \"{label_str}\" must contain \"@{QN_TYPE}\""
- );
- }
- }
-
- #[test]
- fn unexpected_attr_with_recovery() {
- attr_parse! {
- struct UnexpectedState -> Unexpected {
- name: (QN_NAME) => Attr,
- src: (QN_SRC) => Attr,
- }
- }
-
- let attr_name = Attr(QN_NAME, "val_name".into(), AttrSpan(S1, S2));
- let attr_unexpected =
- Attr(QN_TYPE, "unexpected".into(), AttrSpan(S1, S2));
- let attr_src = Attr(QN_SRC, "val_src".into(), AttrSpan(S2, S3));
- let tok_dead = close_empty(S3, Depth(0));
-
- let toks = vec![
- // This is expected:
- XirfToken::Attr(attr_name.clone()),
- // NOT expected (produce an error):
- XirfToken::Attr(attr_unexpected.clone()),
- // <Recovery must take place here.>
- // This is expected after recovery:
- XirfToken::Attr(attr_src.clone()),
- // Will cause dead state:
- tok_dead.clone(),
- ]
- .into_iter();
-
- let mut sut =
- Parser::with_state(UnexpectedState::with_element(QN_ELE, SE), toks);
-
- // This will fail at the unknown attribute,
- // and must then remain in a state where parsing can be resumed.
- // This simply means ignoring the provided attribute,
- // which in XIRF is discarding a single token of input,
- // rather than having to continue parsing the attribute to then
- // discard.
- assert_eq!(
- Err(ParseError::StateError(AttrParseError::UnexpectedAttr(
- attr_unexpected,
- QN_ELE,
- ))),
- parse_aggregate_with(&mut sut),
- );
-
- // The final result,
- // after having failed and recovered.
- assert_eq!(
- Ok((
- Unexpected {
- name: attr_name,
- src: attr_src,
- },
- tok_dead
- )),
- parse_aggregate_with(&mut sut),
- );
- }
-}
+mod test;
diff --git a/tamer/src/xir/parse/attr/test.rs b/tamer/src/xir/parse/attr/test.rs
new file mode 100644
index 0000000..ca40eda
--- /dev/null
+++ b/tamer/src/xir/parse/attr/test.rs
@@ -0,0 +1,632 @@
+// XIR attribute parser generator tests
+//
+// Copyright (C) 2014-2022 Ryan Specialty Group, LLC.
+//
+// This file is part of TAME.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+use super::*;
+use crate::{
+ diagnose::AnnotatedSpan,
+ parse::{ParseError, ParseState, Parsed, Parser, TokenStream},
+ span::dummy::*,
+ sym::SymbolId,
+ xir::{
+ attr::{Attr, AttrSpan},
+ flat::{test::close_empty, Depth, XirfToken},
+ st::qname::*,
+ },
+};
+use std::{assert_matches::assert_matches, error::Error, fmt::Display};
+
+const SE: OpenSpan = OpenSpan(S1.offset_add(100).unwrap(), 0);
+
+// Random choice of QName for tests.
+const QN_ELE: QName = QN_YIELDS;
+
+fn parse_aggregate<S: AttrParseState>(
+ toks: impl TokenStream<S::Token>,
+) -> Result<(S::Object, S::Token), ParseError<S::Token, S::Error>>
+where
+ S: AttrParseState,
+ S::Context: Default,
+{
+ parse_aggregate_with(&mut Parser::with_state(
+ S::with_element(QN_ELE, SE),
+ toks,
+ ))
+}
+
+fn parse_aggregate_with<S: AttrParseState, I>(
+ sut: &mut Parser<S, I>,
+) -> Result<(S::Object, S::Token), ParseError<S::Token, S::Error>>
+where
+ S: ParseState,
+ S::Context: Default,
+ I: TokenStream<S::Token>,
+{
+ let mut obj = None;
+
+ for item in sut {
+ match item {
+ Ok(Parsed::Object(result)) => {
+ obj.replace(result);
+ }
+ Ok(Parsed::Incomplete) => continue,
+ // This represents the dead state,
+ // since this is the top-level parser.
+ Err(ParseError::UnexpectedToken(tok, _)) => {
+ return Ok((
+ obj.expect(
+ "parser did not produce aggregate attribute object",
+ ),
+ tok,
+ ))
+ }
+ Err(other) => return Err(other),
+ }
+ }
+
+ panic!("expected AttrParseState dead state (obj: {obj:?})");
+}
+
+#[test]
+fn required_with_values() {
+ attr_parse! {
+ struct ReqValuesState -> ReqValues {
+ name: (QN_NAME) => Attr,
+ yields: (QN_YIELDS) => Attr,
+ }
+ }
+
+ let attr_name = Attr(QN_NAME, "val_name".into(), AttrSpan(S1, S2));
+ let attr_yields = Attr(QN_YIELDS, "val_value".into(), AttrSpan(S2, S3));
+ let tok_dead = close_empty(S3, Depth(0));
+
+ let toks = vec![
+ XirfToken::Attr(attr_name.clone()),
+ XirfToken::Attr(attr_yields.clone()),
+ // Will cause dead state:
+ tok_dead.clone(),
+ ]
+ .into_iter();
+
+ assert_eq!(
+ Ok((
+ ReqValues {
+ name: attr_name,
+ yields: attr_yields,
+ },
+ tok_dead
+ )),
+ parse_aggregate::<ReqValuesState>(toks),
+ );
+}
+
+// Same as above test,
+// but the order of the tokens is swapped.
+#[test]
+fn required_with_values_out_of_order() {
+ attr_parse! {
+ struct ReqValuesState -> ReqValues {
+ name: (QN_NAME) => Attr,
+ yields: (QN_YIELDS) => Attr,
+ }
+ }
+
+ let attr_name = Attr(QN_NAME, "val_name".into(), AttrSpan(S1, S2));
+ let attr_yields = Attr(QN_YIELDS, "val_value".into(), AttrSpan(S2, S3));
+ let tok_dead = close_empty(S3, Depth(0));
+
+ // @yields then @name just to emphasize that order does not matter.
+ let toks = vec![
+ XirfToken::Attr(attr_yields.clone()),
+ XirfToken::Attr(attr_name.clone()),
+ // Will cause dead state:
+ tok_dead.clone(),
+ ]
+ .into_iter();
+
+ assert_eq!(
+ Ok((
+ ReqValues {
+ name: attr_name,
+ yields: attr_yields,
+ },
+ tok_dead
+ )),
+ parse_aggregate::<ReqValuesState>(toks),
+ );
+}
+
+#[test]
+fn optional_with_values() {
+ attr_parse! {
+ struct OptValuesState -> OptValues {
+ name: (QN_NAME?) => Option<Attr>,
+ yields: (QN_YIELDS?) => Option<Attr>,
+ }
+ }
+
+ let attr_name = Attr(QN_NAME, "val_name".into(), AttrSpan(S1, S2));
+ let attr_yields = Attr(QN_YIELDS, "val_value".into(), AttrSpan(S2, S3));
+ let tok_dead = close_empty(S3, Depth(0));
+
+ let toks = vec![
+ XirfToken::Attr(attr_name.clone()),
+ XirfToken::Attr(attr_yields.clone()),
+ // Will cause dead state:
+ tok_dead.clone(),
+ ]
+ .into_iter();
+
+ assert_eq!(
+ Ok((
+ OptValues {
+ name: Some(attr_name),
+ yields: Some(attr_yields),
+ },
+ tok_dead
+ )),
+ parse_aggregate::<OptValuesState>(toks),
+ );
+}
+
+#[test]
+fn attr_value_into() {
+ #[derive(Debug, PartialEq, Eq)]
+ struct Foo(SymbolId);
+
+ impl From<Attr> for Foo {
+ fn from(attr: Attr) -> Self {
+ Foo(attr.value())
+ }
+ }
+
+ attr_parse! {
+ // Note that associated type `ValueError` defaults to `Infallible`,
+ // which is why `From` is sufficient above.
+ struct ValueIntoState -> ValueInto {
+ name: (QN_NAME) => Foo,
+ yields: (QN_YIELDS?) => Option<Foo>,
+ }
+ }
+
+ let val_name = "val_name".into();
+ let val_yields = "val_yields".into();
+ let attr_name = Attr(QN_NAME, val_name, AttrSpan(S1, S2));
+ let attr_yields = Attr(QN_YIELDS, val_yields, AttrSpan(S2, S3));
+ let tok_dead = close_empty(S3, Depth(0));
+
+ let toks = vec![
+ XirfToken::Attr(attr_name.clone()),
+ XirfToken::Attr(attr_yields.clone()),
+ // Will cause dead state:
+ tok_dead.clone(),
+ ]
+ .into_iter();
+
+ assert_eq!(
+ Ok((
+ ValueInto {
+ name: Foo(val_name),
+ yields: Some(Foo(val_yields)),
+ },
+ tok_dead
+ )),
+ parse_aggregate::<ValueIntoState>(toks),
+ );
+}
+
+// This test would fail at compile time.
+#[test]
+fn attr_value_error() {
+ #[derive(Debug, PartialEq, Eq)]
+ struct Foo;
+
+ impl TryFrom<Attr> for Foo {
+ type Error = FooError;
+
+ fn try_from(attr: Attr) -> Result<Self, Self::Error> {
+ Err(FooError(attr.value()))
+ }
+ }
+
+ #[derive(Debug, PartialEq)]
+ struct FooError(SymbolId);
+
+ impl Error for FooError {
+ fn source(&self) -> Option<&(dyn Error + 'static)> {
+ None
+ }
+ }
+
+ impl Display for FooError {
+ fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+ write!(f, "test FooError")
+ }
+ }
+
+ impl Diagnostic for FooError {
+ fn describe(&self) -> Vec<AnnotatedSpan> {
+ vec![]
+ }
+ }
+
+ attr_parse! {
+ type ValueError = FooError;
+
+ struct ValueIntoState -> ValueInto {
+ name: (QN_NAME) => Foo,
+ yields: (QN_YIELDS?) => Option<Foo>,
+ }
+ }
+
+ let val_name = "val_name".into();
+ let val_yields = "val_yields".into();
+ let attr_name = Attr(QN_NAME, val_name, AttrSpan(S1, S2));
+ let attr_yields = Attr(QN_YIELDS, val_yields, AttrSpan(S2, S3));
+
+ let toks = vec![
+ XirfToken::Attr(attr_name.clone()),
+ XirfToken::Attr(attr_yields.clone()),
+ ];
+
+ let mut sut = Parser::with_state(
+ ValueIntoState::with_element(QN_ELE, SE),
+ toks.into_iter(),
+ );
+
+ assert_eq!(
+ sut.next(),
+ Some(Err(ParseError::StateError(AttrParseError::InvalidValue(
+ FooError(val_name),
+ QN_ELE
+ ))))
+ );
+
+ // TryInto on `Option` inner type.
+ assert_eq!(
+ sut.next(),
+ Some(Err(ParseError::StateError(AttrParseError::InvalidValue(
+ FooError(val_yields),
+ QN_ELE
+ ))))
+ );
+}
+
+#[test]
+fn optional_with_all_missing() {
+ attr_parse! {
+ struct OptMissingState -> OptMissing {
+ name: (QN_NAME?) => Option<Attr>,
+ yields: (QN_YIELDS?) => Option<Attr>,
+ }
+ }
+
+ let tok_dead = close_empty(S3, Depth(0));
+
+ let toks = vec![
+ // Will cause dead state:
+ tok_dead.clone(),
+ ]
+ .into_iter();
+
+ assert_eq!(
+ Ok((
+ OptMissing {
+ name: None,
+ yields: None,
+ },
+ tok_dead
+ )),
+ parse_aggregate::<OptMissingState>(toks),
+ );
+}
+
+#[test]
+fn mixed_some_optional_missing() {
+ attr_parse! {
+ struct MixedState -> Mixed {
+ name: (QN_NAME) => Attr,
+ src: (QN_SRC?) => Option<Attr>,
+ yields: (QN_YIELDS?) => Option<Attr>,
+ }
+ }
+
+ let attr_name = Attr(QN_NAME, "val_name".into(), AttrSpan(S1, S2));
+ let attr_src = Attr(QN_SRC, "val_src".into(), AttrSpan(S2, S3));
+ let tok_dead = close_empty(S3, Depth(0));
+
+ let toks = vec![
+ // `name` and `src` but no optional `yields`.
+ XirfToken::Attr(attr_name.clone()),
+ XirfToken::Attr(attr_src.clone()),
+ // Will cause dead state:
+ tok_dead.clone(),
+ ]
+ .into_iter();
+
+ assert_eq!(
+ Ok((
+ Mixed {
+ name: attr_name,
+ src: Some(attr_src),
+ yields: None,
+ },
+ tok_dead
+ )),
+ parse_aggregate::<MixedState>(toks),
+ );
+}
+
+mod required {
+ use super::*;
+ use crate::{sym::st, xir::EleSpan};
+
+ attr_parse! {
+ struct ReqMissingState -> ReqMissing {
+ name: (QN_NAME) => Attr,
+ src: (QN_SRC) => Attr,
+ ty: (QN_TYPE) => Attr,
+ yields: (QN_YIELDS) => Attr,
+ }
+ }
+
+ const ATTR_NAME: Attr = Attr(QN_NAME, st::raw::L_NAME, AttrSpan(S1, S2));
+ const ATTR_YIELDS: Attr =
+ Attr(QN_YIELDS, st::raw::L_VALUE, AttrSpan(S2, S3));
+
+ #[test]
+ fn required_missing_values() {
+ let tok_dead = close_empty(S3, Depth(0));
+
+ let toks = vec![
+ XirfToken::Attr(ATTR_NAME),
+ // <Missing @src, but no error yet.>
+ // <Missing @type, but no error yet.>
+ XirfToken::Attr(ATTR_YIELDS),
+ // Will cause dead state,
+ // which will then trigger the error:
+ tok_dead.clone(),
+ ]
+ .into_iter();
+
+ let err = parse_aggregate::<ReqMissingState>(toks)
+ .expect_err("expected failure from missing attributes");
+
+ let sut = ReqMissingState::with_element(QN_ELE, SE);
+
+ // The error should provide the state of the parser during the
+ // finalization step.
+ // Since this happens in a dead state,
+ // we must also receive the token that triggered it,
+ // just as we would normally receive on successful parsing.
+ assert_matches!(
+ err,
+ ParseError::StateError(AttrParseError::MissingRequired(
+ given_sut,
+ ReqMissingStateFields {
+ name: Some((ref given_name, _)),
+ src: None, // cause of the error
+ ty: None, // another cause of the error
+ yields: Some((ref given_yields, _)),
+ ..
+ },
+ )) if given_sut == sut
+ && given_name == &ATTR_NAME
+ && given_yields == &ATTR_YIELDS
+ );
+ }
+
+ /// Relies on [`required_missing_values`] above to verify state of the
+ /// parser used in the error.
+ #[test]
+ fn error_contains_all_required_missing_attr_names() {
+ // Manually construct the partial state rather than parsing tokens.
+ // `required_missing_values` above verifies that this state is what
+ // is in fact constructed from a failed parsing attempt.
+ let sut = ReqMissingState::with_element(QN_ELE, SE);
+ let mut partial = ReqMissingStateFields::default();
+ partial.name.replace((ATTR_NAME, S1));
+ partial.yields.replace((ATTR_YIELDS, S2));
+
+ let err = AttrParseError::MissingRequired(sut, partial);
+
+ // When represented as a string,
+ // the error should produce _all_ required attributes that do not
+ // have values,
+ // rather than requiring the user to fix one and re-compile only
+ // to encounter another,
+ // and potentially repeat multiple times.
+ let err_str = err.to_string();
+ assert!(
+ err_str.contains(&format!("@{QN_SRC}")),
+ "\"{err_str}\" must contain \"@{QN_SRC}\""
+ );
+ assert!(
+ err_str.contains(&format!("@{QN_TYPE}")),
+ "\"{err_str}\" must contain \"@{QN_TYPE}\""
+ );
+
+ // The error should also reference the element name
+ // (which is provided in `parse_aggregate`).
+ assert!(
+ err_str.contains(&QN_ELE.to_string()),
+ "\"{err_str}\" must contain name of element being parsed"
+ );
+ }
+
+ /// See also [`error_contains_all_required_missing_attr_names`].
+ #[test]
+ fn diagnostic_message_contains_all_required_missing_attr_name() {
+ let sut = ReqMissingState::with_element(QN_ELE, SE);
+ let mut partial = ReqMissingStateFields::default();
+ partial.name.replace((ATTR_NAME, S1));
+ partial.yields.replace((ATTR_YIELDS, S2));
+
+ let err = AttrParseError::MissingRequired(sut, partial);
+ let desc = err.describe();
+
+ // The diagnostic message should reference the element.
+ assert_eq!(desc[0].span(), SE.span());
+
+ // It should re-state the required attributes,
+ // since this is where the user will most likely be looking.
+ let label_str = desc[0]
+ .label()
+ .expect("missing diagnostic label")
+ .to_string();
+
+ assert!(
+ label_str.contains(&format!("@{QN_SRC}")),
+ "diagnostic label \"{label_str}\" must contain \"@{QN_SRC}\""
+ );
+ assert!(
+ label_str.contains(&format!("@{QN_TYPE}")),
+ "diagnostic label \"{label_str}\" must contain \"@{QN_TYPE}\""
+ );
+ }
+}
+
+#[test]
+fn unexpected_attr_with_recovery() {
+ attr_parse! {
+ struct UnexpectedState -> Unexpected {
+ name: (QN_NAME) => Attr,
+ src: (QN_SRC) => Attr,
+ }
+ }
+
+ let attr_name = Attr(QN_NAME, "val_name".into(), AttrSpan(S1, S2));
+ let attr_unexpected = Attr(QN_TYPE, "unexpected".into(), AttrSpan(S1, S2));
+ let attr_src = Attr(QN_SRC, "val_src".into(), AttrSpan(S2, S3));
+ let tok_dead = close_empty(S3, Depth(0));
+
+ let toks = vec![
+ // This is expected:
+ XirfToken::Attr(attr_name.clone()),
+ // NOT expected (produce an error):
+ XirfToken::Attr(attr_unexpected.clone()),
+ // <Recovery must take place here.>
+ // This is expected after recovery:
+ XirfToken::Attr(attr_src.clone()),
+ // Will cause dead state:
+ tok_dead.clone(),
+ ]
+ .into_iter();
+
+ let mut sut =
+ Parser::with_state(UnexpectedState::with_element(QN_ELE, SE), toks);
+
+ // This will fail at the unknown attribute,
+ // and must then remain in a state where parsing can be resumed.
+ // This simply means ignoring the provided attribute,
+ // which in XIRF is discarding a single token of input,
+ // rather than having to continue parsing the attribute to then
+ // discard.
+ assert_eq!(
+ Err(ParseError::StateError(AttrParseError::UnexpectedAttr(
+ attr_unexpected,
+ QN_ELE,
+ ))),
+ parse_aggregate_with(&mut sut),
+ );
+
+ // The final result,
+ // after having failed and recovered.
+ assert_eq!(
+ Ok((
+ Unexpected {
+ name: attr_name,
+ src: attr_src,
+ },
+ tok_dead
+ )),
+ parse_aggregate_with(&mut sut),
+ );
+}
+
+// A duplicate attribute will result in an error,
+// and recovery will cause the duplicate to be ignored.
+#[test]
+fn duplicate_attr_with_recovery() {
+ attr_parse! {
+ struct DupState -> Dup {
+ name: (QN_NAME) => Attr,
+ src: (QN_SRC) => Attr,
+ }
+ }
+
+ let attr_keep = Attr(QN_NAME, "keep me".into(), AttrSpan(S1, S2));
+ let attr_dup = Attr(QN_NAME, "duplicate".into(), AttrSpan(S2, S3));
+ let attr_src = Attr(QN_SRC, "val_src".into(), AttrSpan(S3, S1));
+ let tok_dead = close_empty(S3, Depth(0));
+
+ let toks = vec![
+ // Both of these have the same name (@name).
+ XirfToken::Attr(attr_keep.clone()),
+ XirfToken::Attr(attr_dup.clone()),
+ // Another attribute just to show that error recovery permits
+ // further attribute parsing.
+ XirfToken::Attr(attr_src.clone()),
+ // Will cause dead state:
+ tok_dead.clone(),
+ ]
+ .into_iter();
+
+ let mut sut = Parser::with_state(DupState::with_element(QN_ELE, SE), toks);
+
+ // The first token is good,
+ // since we haven't encountered the attribute yet.
+ assert_eq!(sut.next(), Some(Ok(Parsed::Incomplete)));
+
+ // The second one results in an error,
+ // since the name is the same.
+ let err = sut
+ .next()
+ .unwrap()
+ .expect_err("DuplicateAttr error expected");
+
+ assert_eq!(
+ ParseError::StateError(AttrParseError::DuplicateAttr(
+ attr_dup,
+ attr_keep.attr_span().key_span(),
+ QN_ELE,
+ )),
+ err,
+ );
+
+ // The diagnostic description of this error should contain first a
+ // reference to the original attribute,
+ // and then a reference to the duplicate.
+ let desc = err.describe();
+ assert_eq!(desc[0].span(), S1);
+ assert_eq!(desc[1].span(), S2);
+
+ // Once parsing is completed,
+ // we must have kept the first occurrence of the attribute and
+ // discarded the second.
+ assert_eq!(
+ Ok((
+ Dup {
+ name: attr_keep,
+ src: attr_src,
+ },
+ tok_dead
+ )),
+ parse_aggregate_with(&mut sut),
+ );
+}
diff --git a/tamer/src/xir/parse/ele.rs b/tamer/src/xir/parse/ele.rs
index ec48012..6182c04 100644
--- a/tamer/src/xir/parse/ele.rs
+++ b/tamer/src/xir/parse/ele.rs
@@ -18,55 +18,362 @@
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//! Element parser generator for parsing of [XIRF](super::super::flat).
+//!
+//! _TODO:_ This needs significantly more documentation;
+//! this is one of the most confusing and complex components of TAMER.
+
+use super::AttrParseState;
+use crate::{
+ diagnostic_panic,
+ fmt::{DisplayWrapper, TtQuote},
+ parse::{
+ ClosedParseState, Context, ParseState, Transition, TransitionResult,
+ },
+ span::Span,
+ xir::{flat::Depth, CloseSpan, OpenSpan, Prefix, QName},
+};
+use arrayvec::ArrayVec;
+use std::{
+ fmt::{Debug, Display, Formatter},
+ marker::PhantomData,
+};
+
+#[cfg(doc)]
+use crate::{ele_parse, parse::Parser};
+
+/// A parser accepting a single element.
+pub trait EleParseState: ParseState {}
+
+/// [`SuperState`] [`Context`] that gets propagated to each child parser.
+///
+/// This consists of two components:
+///
+/// 1. The [`StateStack`],
+/// used to store child NT [`ParseState`]s when transferring to
+/// another NT; and
+/// 2. An [`AttrFieldSum`] object representing the active attribute field
+/// context.
+#[derive(Debug, Default)]
+pub struct SuperCtx<S: SuperState + Default>(
+ Context<StateStack<S>>,
+ S::AttrFields,
+);
+
+impl<S: SuperState + Default> SuperCtx<S> {
+ /// Retrieve a mutable reference to each component.
+ ///
+ /// This is utilized because method calls are more convenient than
+ /// destructuring with [`Context`]'s required use of `Deref`.
+ pub fn parts(
+ &mut self,
+ ) -> (&mut Context<StateStack<S>>, &mut S::AttrFields) {
+ match self {
+ Self(stack, fields) => (stack, fields),
+ }
+ }
+
+ pub fn stack_ref(&self) -> &Context<StateStack<S>> {
+ match self {
+ Self(stack, _) => stack,
+ }
+ }
+}
+
+/// Maximum level of parser nesting.
+///
+/// Unfortunately,
+/// this limit _does not_ correspond to the level of XML nesting;
+/// parsers composed of Sum NTs,
+/// in particular,
+/// push multiple parsers onto the stack for a single element.
+///
+/// Note that this is assuming that this parser is used only for TAME
+/// sources.
+/// If that's not the case,
+/// this can be made to be configurable like XIRF.
+pub const MAX_DEPTH: usize = 64;
+
+/// Parser stack for trampoline.
+///
+/// This can be used as a call stack for parsers while avoiding creating
+/// otherwise-recursive data structures with composition-based delegation.
+/// However,
+/// it is more similar to CPS,
+/// in that the parser popped off the stack need not be the parser that
+/// initiated the request and merely represents the next step in
+/// a delayed computation.
+/// If such a return context is unneeded,
+/// a [`ParseState`] may implement tail calls by simply not pushing itself
+/// onto the stack before requesting transfer to another [`ParseState`].
+#[derive(Debug, Default)]
+pub struct StateStack<S: SuperState>(ArrayVec<S, MAX_DEPTH>);
+
+pub type SuperStateContext<S> = Context<SuperCtx<S>>;
+
+// Note that public visibility is needed because `ele_parse` expands outside
+// of this module.
+impl<S: SuperState> StateStack<S> {
+ /// Request a transfer to another [`ParseState`],
+ /// expecting that control be returned to `ret` after it has
+ /// completed.
+ ///
+ /// This can be reasoned about like calling a thunk:
+ /// the return [`ParseState`] is put onto the stack,
+ /// the target [`ParseState`] is used for the state transition to
+ /// cause [`Parser`] to perform the call to it,
+ /// and when it is done
+ /// (e.g. a dead state),
+ /// `ret` will be pop'd from the stack and we'll transition back to
+ /// it.
+ /// Note that this method is not responsible for returning;
+ /// see [`Self::ret_or_dead`] to perform a return.
+ ///
+ /// However,
+ /// the calling [`ParseState`] is not responsible for its return,
+ /// unlike a typical function call.
+ /// Instead,
+ /// this _actually_ more closely resembles CPS
+ /// (continuation passing style),
+ /// and so [`ele_parse!`] must be careful to ensure that stack
+ /// operations are properly paired.
+ /// On the upside,
+ /// if something is erroneously `ret`'d,
+ /// the parser is guaranteed to be in a consistent state since the
+ /// entire state has been reified
+ /// (but the input would then be parsed incorrectly).
+ ///
+ /// Note that tail calls can be implemented by transferring control
+ /// without pushing an entry on the stack to return to,
+ /// but that hasn't been formalized \[yet\] and requires extra care.
+ pub fn transfer_with_ret<SA, ST>(
+ &mut self,
+ Transition(ret): Transition<SA>,
+ target: TransitionResult<ST>,
+ ) -> TransitionResult<ST>
+ where
+ SA: ParseState<Super = S::Super>,
+ ST: ParseState,
+ {
+ let Self(stack) = self;
+
+ // TODO: Global configuration to (hopefully) ensure that XIRF will
+ // actually catch this.
+ if stack.is_full() {
+ // TODO: We need some spans here and ideally convert the
+ // parenthetical error message into a diagnostic footnote.
+ // TODO: Or should we have a special error type that tells the
+ // parent `Parser` to panic with context?
+ diagnostic_panic!(
+ vec![],
+ "maximum parsing depth of {} exceeded while attempting \
+ to push return state {} \
+ (try reducing XML nesting as a workaround)",
+ MAX_DEPTH,
+ TtQuote::wrap(ret),
+ );
+ }
+
+ stack.push(ret.into());
+ target
+ }
+
+ /// Attempt to return to a previous [`ParseState`] that transferred
+ /// control away from itself,
+ /// otherwise yield a dead state transition to `deadst`.
+ ///
+ /// Conceptually,
+ /// this is like returning from a function call,
+ /// where the function was invoked using [`Self::transfer_with_ret`].
+ /// However,
+ /// this system is more akin to CPS
+ /// (continuation passing style);
+ /// see [`Self::transfer_with_ret`] for important information.
+ ///
+ /// If there is no state to return to on the stack,
+ /// then it is assumed that we have received more input than expected
+ /// after having completed a full parse.
+ pub fn ret_or_dead(
+ &mut self,
+ lookahead: S::Token,
+ deadst: S,
+ ) -> TransitionResult<S> {
+ let Self(stack) = self;
+
+ // This should certainly never happen unless there is a bug in the
+ // `ele_parse!` parser-generator,
+ // since it means that we're trying to return to a caller that
+ // does not exist.
+ match stack.pop() {
+ Some(st) => Transition(st).incomplete().with_lookahead(lookahead),
+ None => Transition(deadst).dead(lookahead),
+ }
+ }
+
+ /// Test every [`ParseState`] on the stack against the predicate `f`.
+ pub fn all(&self, f: impl Fn(&S) -> bool) -> bool {
+ let Self(stack) = self;
+ stack[..].iter().all(f)
+ }
+}
+
+/// Match some type of node.
+#[derive(Debug, PartialEq, Eq)]
+pub enum NodeMatcher {
+ /// Static [`QName`] with a simple equality check.
+ QName(QName),
+ /// Any element with a matching [`Prefix`].
+ Prefix(Prefix),
+}
+
+impl NodeMatcher {
+ /// Match against the provided [`QName`].
+ pub fn matches(&self, qname: QName) -> bool {
+ match self {
+ Self::QName(qn_match) if qn_match == &qname => true,
+ Self::Prefix(prefix) if Some(*prefix) == qname.prefix() => true,
+ _ => false,
+ }
+ }
+}
+
+impl From<QName> for NodeMatcher {
+ fn from(qname: QName) -> Self {
+ Self::QName(qname)
+ }
+}
+
+impl From<Prefix> for NodeMatcher {
+ fn from(prefix: Prefix) -> Self {
+ Self::Prefix(prefix)
+ }
+}
+
+impl Display for NodeMatcher {
+ fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+ use crate::xir::fmt::XmlPrefixAnyLocal;
+
+ match self {
+ Self::QName(qname) => Display::fmt(qname, f),
+ Self::Prefix(prefix) => XmlPrefixAnyLocal::fmt(prefix, f),
+ }
+ }
+}
#[macro_export]
macro_rules! ele_parse {
- (type Object = $objty:ty; $($rest:tt)*) => {
- ele_parse!(@!nonterm_decl <$objty> $($rest)*)
+ (
+ $(#[$super_attr:meta])*
+ $vis:vis enum $super:ident;
+
+ // Attr has to be first to avoid ambiguity with `$rest`.
+ $(type AttrValueError = $evty:ty;)?
+ type Object = $objty:ty;
+
+ $(
+ [super] {
+ $($super_body:tt)*
+ };
+ )?
+
+ // Combination of square brackets above and the prefix here are
+ // needed for disambiguation.
+ $(#[$nt_first_attr:meta])*
+ $nt_first:ident := $($nt_defs:tt)*
+ ) => {
+ ele_parse! {@!next $vis $super
+ $(type AttrValueError = $evty;)?
+ type Object = $objty;
+ $(#[$nt_first_attr])*
+ $nt_first := $($nt_defs)*
+ }
+
+ ele_parse!(@!super_sum <$objty> $(#[$super_attr])* $vis $super
+ $([super] { $($super_body)* })?
+ $nt_first := $($nt_defs)*
+ );
+ };
+
+ (@!next $vis:vis $super:ident
+ // Attr has to be first to avoid ambiguity with `$rest`.
+ $(type AttrValueError = $evty:ty;)?
+ type Object = $objty:ty;
+
+ $($rest:tt)*
+ ) => {
+ ele_parse!(@!nonterm_decl <$objty, $($evty)?> $vis $super $($rest)*);
};
- (@!nonterm_decl <$objty:ty> $nt:ident := $($rest:tt)*) => {
- ele_parse!(@!nonterm_def <$objty> $nt $($rest)*);
+ (@!nonterm_decl <$objty:ty, $($evty:ty)?>
+ $vis:vis $super:ident $(#[$nt_attr:meta])* $nt:ident := $($rest:tt)*
+ ) => {
+ ele_parse!(@!nonterm_def <$objty, $($evty)?>
+ $vis $super $(#[$nt_attr])* $nt $($rest)*
+ );
};
- (@!nonterm_def <$objty:ty>
- $nt:ident $qname:ident $(($($ntp:tt)*))?
- { $($matches:tt)* } $($rest:tt)*
+ (@!nonterm_def <$objty:ty, $($evty:ty)?>
+ $vis:vis $super:ident $(#[$nt_attr:meta])* $nt:ident $qname:ident $(($($ntp:tt)*))?
+ { $($matches:tt)* }; $($rest:tt)*
) => {
- ele_parse!(@!ele_expand_body <$objty> $nt $qname ($($($ntp)*)?) $($matches)*);
+ ele_parse!(@!ele_expand_body <$objty, $($evty)?>
+ $vis $super $(#[$nt_attr])* $nt $qname ($($($ntp)*)?) $($matches)*
+ );
- ele_parse! {
+ ele_parse! {@!next $vis $super
+ $(type AttrValueError = $evty;)?
type Object = $objty;
$($rest)*
}
};
- (@!nonterm_def <$objty:ty> $nt:ident ($ntreffirst:ident $(| $ntref:ident)+), $($rest:tt)*) => {
- ele_parse!(@!ele_dfn_sum $nt [$ntfirst $($nt)*]);
+ (@!nonterm_def <$objty:ty, $($evty:ty)?>
+ $vis:vis $super:ident $(#[$nt_attr:meta])* $nt:ident
+ ($ntref_first:ident $(| $ntref:ident)+); $($rest:tt)*
+ ) => {
+ ele_parse!(@!ele_dfn_sum <$objty>
+ $vis $super $(#[$nt_attr])* $nt [$ntref_first $($ntref)*]
+ );
- ele_parse! {
+ ele_parse! {@!next $vis $super
+ $(type AttrValueError = $evty;)?
+ type Object = $objty;
$($rest)*
}
};
- (@!nonterm_decl <$objty:ty>) => {};
+ (@!nonterm_decl <$objty:ty, $($evty:ty)?> $vis:vis $super:ident) => {};
// Expand the provided data to a more verbose form that provides the
// context necessary for state transitions.
- (@!ele_expand_body <$objty:ty> $nt:ident $qname:ident ($($ntp:tt)*)
+ (@!ele_expand_body <$objty:ty, $($evty:ty)?>
+ $vis:vis $super:ident
+ $(#[$nt_attr:meta])* $nt:ident $qname:ident ($($ntp:tt)*)
+
@ { $($attrbody:tt)* } => $attrmap:expr,
$(/$(($close_span:ident))? => $closemap:expr,)?
+ // Special forms (`[sp](args) => expr`).
+ $(
+ [$special:ident]$(($($special_arg:ident),*))?
+ => $special_map:expr,
+ )?
+
// Nonterminal references are provided as a list.
+ // A configuration specifier can be provided,
+ // currently intended to support the Kleene star.
$(
$ntref:ident,
)*
- ) => {
+ ) => { paste::paste! {
ele_parse! {
- @!ele_dfn_body <$objty> $nt $qname ($($ntp)*)
+ @!ele_dfn_body <$objty, $($evty)?>
+ $vis $super $(#[$nt_attr])*$nt $qname ($($ntp)*)
+
@ { $($attrbody)* } => $attrmap,
/$($($close_span)?)? => ele_parse!(@!ele_close $($closemap)?),
+ $([$special]$(($($special_arg),*))? => $special_map,)?
+
<> {
$(
$ntref,
@@ -77,12 +384,12 @@ macro_rules! ele_parse {
-> {
@ ->
$(
- ($nt::$ntref),
- ($nt::$ntref) ->
- )* ($nt::ExpectClose_),
+ ([<$nt ChildNt_>]::$ntref, $ntref),
+ ([<$nt ChildNt_>]::$ntref, $ntref) ->
+ )* ([<$nt ChildNt_>]::ExpectClose_, ()),
}
}
- };
+ } };
// No explicit Close mapping defaults to doing nothing at all
// (so yield Incomplete).
@@ -94,7 +401,45 @@ macro_rules! ele_parse {
crate::parse::ParseStatus::Object($close)
};
- (@!ele_dfn_body <$objty:ty> $nt:ident $qname:ident ($($open_span:ident)?)
+ // Delegation when the destination type is `()`,
+ // indicating that the next state is not a child NT
+ // (it is likely the state expecting a closing tag).
+ (@!ntref_delegate
+ $stack:ident, $ret:expr, (), $_target:expr, $done:expr
+ ) => {
+ $done
+ };
+
+ // Delegate to a child parser by pushing self onto the stack and
+ // yielding to one of the child's states.
+ // This uses a trampoline,
+ // which avoids recursive data structures
+ // (due to `ParseState` composition/stitching)
+ // and does not grow the call stack.
+ (@!ntref_delegate
+ $stack:ident, $ret:expr, $ntnext_st:ty, $target:expr, $_done:expr
+ ) => {
+ $stack.transfer_with_ret(
+ Transition($ret),
+ $target,
+ )
+ };
+
+ // Same as above,
+ // but in situations where we will never transition to a done state.
+ (@!ntref_delegate_nodone
+ $stack:ident, $ret:expr, $ntnext_st:ty, $target:expr
+ ) => {
+ $stack.transfer_with_ret(
+ Transition($ret),
+ $target,
+ )
+ };
+
+ (@!ele_dfn_body <$objty:ty, $($evty:ty)?>
+ $vis:vis $super:ident $(#[$nt_attr:meta])* $nt:ident $qname:ident
+ ($($qname_matched:pat, $open_span:pat)?)
+
// Attribute definition special form.
@ {
// We must lightly parse attributes here so that we can retrieve
@@ -102,7 +447,7 @@ macro_rules! ele_parse {
// `$attrmap`.
$(
$(#[$fattr:meta])*
- $field:ident: ($fmatch:tt) => $fty:ty,
+ $field:ident: ($($fmatch:tt)+) => $fty:ty,
)*
} => $attrmap:expr,
@@ -110,6 +455,9 @@ macro_rules! ele_parse {
// (defaulting to Incomplete via @!ele_expand_body).
/$($close_span:ident)? => $closemap:expr,
+ // Streaming (as opposed to aggregate) attribute parsing.
+ $([attr]($attr_stream_binding:ident) => $attr_stream_map:expr,)?
+
// Nonterminal references.
<> {
$(
@@ -118,262 +466,1611 @@ macro_rules! ele_parse {
}
-> {
- @ -> ($ntfirst:path),
+ @ -> ($ntfirst:path, $ntfirst_st:ty),
$(
- ($ntprev:path) -> ($ntnext:path),
+ ($ntprev:path, $ntprev_st:ty) -> ($ntnext:path, $ntnext_st:ty),
)*
}
- ) => {
- // TODO
- paste::paste! {
- crate::attr_parse! {
- struct [<$nt AttrsState_>] -> [<$nt Attrs_>] {
- $(
- $(#[$fattr])*
- $field: ($fmatch) => $fty,
- )*
- }
- }
+ ) => { paste::paste! {
+ crate::attr_parse! {
+ /// Attribute parser for
+ #[doc=concat!("[`", stringify!($nt), "`].")]
+ vis($vis);
+ $(type ValueError = $evty;)?
- #[doc=concat!("Parser for element [`", stringify!($qname), "`].")]
- #[derive(Debug, PartialEq, Eq, Default)]
- enum $nt {
- #[doc=concat!(
- "Expecting opening tag for element [`",
- stringify!($qname),
- "`]."
- )]
- #[default]
- Expecting_,
- /// Recovery state ignoring all remaining tokens for this
- /// element.
- RecoverEleIgnore_(crate::xir::QName, crate::xir::OpenSpan, Depth),
- RecoverEleIgnoreClosed_(crate::xir::QName, crate::xir::CloseSpan),
- /// Parsing element attributes.
- Attrs_([<$nt AttrsState_>]),
+ struct #[doc(hidden)] [<$nt AttrState_>] -> [<$nt Attrs>] {
$(
- $ntref($ntref),
+ $(#[$fattr])*
+ $field: ($($fmatch)+) => $fty,
)*
- ExpectClose_(()),
- /// Closing tag found and parsing of the element is
- /// complete.
- Closed_(crate::span::Span),
}
+ }
- impl std::fmt::Display for $nt {
- fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
- use crate::{
- fmt::{DisplayWrapper, TtQuote},
- xir::fmt::TtOpenXmlEle,
- };
+ #[doc(hidden)]
+ #[derive(Debug, PartialEq, Eq)]
+ $vis enum [<$nt ChildNt_>] {
+ $(
+ $ntref(
+ (
+ crate::xir::QName,
+ crate::xir::OpenSpan,
+ crate::xir::flat::Depth
+ ),
+ ),
+ )*
- match self {
- Self::Expecting_ => write!(
- f,
- "expecting opening tag {}",
- TtOpenXmlEle::wrap($qname),
- ),
- Self::RecoverEleIgnore_(name, ..) => write!(
- f,
- "attempting to recover by ignoring element \
- with unexpected name {given} \
- (expected {expected})",
- given = TtQuote::wrap(name),
- expected = TtQuote::wrap($qname),
- ),
+ ExpectClose_(
+ (
+ crate::xir::QName,
+ crate::xir::OpenSpan,
+ crate::xir::flat::Depth
+ ),
+ ),
+ }
- Self::Attrs_(sa) => todo!("Attrs_ Display: {sa:?}"),
- Self::Closed_(_) => write!(
- f,
- "element {} closed",
- TtQuote::wrap($qname)
- ),
- $(
- Self::$ntref(st) => std::fmt::Display::fmt(st, f),
- )*
- todo => todo!("other Display: {todo:?}"),
- }
- }
+ $(#[$nt_attr])*
+ ///
+ #[doc=concat!("Parser for element [`", stringify!($qname), "`] ")]
+ #[doc=concat!("with attributes [`", stringify!([<$nt Attrs>]), "`].")]
+ #[derive(Debug, PartialEq, Eq, Default)]
+ $vis struct $nt(crate::xir::parse::NtState<$nt>);
+
+ #[doc(hidden)]
+ $vis type [<$nt AttrFields>] =
+ crate::parse::Context<
+ <[<$nt AttrState_>] as crate::xir::parse::AttrParseState>::Fields
+ >;
+
+ impl $nt {
+ /// A default state that cannot be preempted by the superstate.
+ #[allow(dead_code)] // not utilized for every NT
+ fn non_preemptable() -> Self {
+ Self(crate::xir::parse::NtState::NonPreemptableExpecting)
}
- #[derive(Debug, PartialEq)]
- enum [<$nt Error_>] {
- UnexpectedEle_(crate::xir::QName, crate::span::Span),
- Attrs_(crate::xir::parse::AttrParseError<[<$nt AttrsState_>]>),
- $(
- $ntref([<$ntref Error_>]),
- )*
+ /// Whether the given QName would be matched by any of the
+ /// parsers associated with this type.
+ #[inline]
+ fn matches(qname: crate::xir::QName) -> bool {
+ <Self as crate::xir::parse::Nt>::matcher().matches(qname)
}
- impl From<crate::xir::parse::AttrParseError<[<$nt AttrsState_>]>>
- for [<$nt Error_>]
- {
- fn from(
- e: crate::xir::parse::AttrParseError<[<$nt AttrsState_>]>
- ) -> Self {
- [<$nt Error_>]::Attrs_(e)
- }
+ /// Number of
+ /// [`NodeMatcher`](crate::xir::parse::NodeMatcher)s
+ /// considered by this parser.
+ ///
+ /// This is always `1` for this parser.
+ #[allow(dead_code)] // used by Sum NTs
+ const fn matches_n() -> usize {
+ 1
}
- $(
- impl From<[<$ntref Error_>]> for [<$nt Error_>] {
- fn from(e: [<$ntref Error_>]) -> Self {
- [<$nt Error_>]::$ntref(e)
- }
- }
- )*
+ /// Format matcher for display.
+ ///
+ /// This value may be rendered singularly or as part of a list of
+ /// values joined together by Sum NTs.
+ /// This function receives the number of values to be formatted
+ /// as `n` and the current 0-indexed offset within that list
+ /// as `i`.
+ /// This allows for zero-copy rendering of composable NTs.
+ ///
+ /// `i` must be incremented after the operation.
+ #[allow(dead_code)] // used by Sum NTs
+ fn fmt_matches(
+ n: usize,
+ i: &mut usize,
+ f: &mut std::fmt::Formatter
+ ) -> std::fmt::Result {
+ use crate::{
+ fmt::ListDisplayWrapper,
+ xir::{fmt::EleSumList, parse::Nt},
+ };
- impl std::error::Error for [<$nt Error_>] {
- fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
- // TODO
- None
+ let matcher = &<Self as Nt>::matcher();
+ EleSumList::fmt_nth(n, *i, matcher, f)?;
+ *i += 1;
+
+ Ok(())
+ }
+
+ /// Whether the parser is in a state that can tolerate superstate
+ /// node preemption.
+ ///
+ /// For more information,
+ /// see the superstate
+ #[doc=concat!(
+ " [`", stringify!($super), "::can_preempt_node`]."
+ )]
+ fn can_preempt_node(&self) -> bool {
+ match self {
+ Self(st) => st.can_preempt_node(),
}
}
- impl std::fmt::Display for [<$nt Error_>] {
- fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
- use crate::{
- fmt::DisplayWrapper,
- xir::fmt::TtOpenXmlEle,
- };
+ #[allow(dead_code)] // used only when there are child NTs
+ /// Whether the current state represents the last child NT.
+ fn is_last_nt(&self) -> bool {
+ use crate::xir::parse::NtState::*;
- match self {
- Self::UnexpectedEle_(name, _) => {
- write!(f, "unexpected {}", TtOpenXmlEle::wrap(name))
- }
- Self::Attrs_(e) => std::fmt::Display::fmt(e, f),
- $(
- Self::$ntref(e) => std::fmt::Display::fmt(e, f),
- )*
- }
+ let Self(st) = self;
+
+ // This results in `Self::$ntref(..) => true,` for the
+ // _last_ NT,
+ // and `=> false` for all others.
+ // If there are no NTs,
+ // it results in `Self::Attrs(..) => true,`,
+ // which is technically true but will never be called in
+ // that context.
+ match st {
+ Attrs(..) => $(
+ false,
+ Jmp([<$nt ChildNt_>]::$ntref(..)) =>
+ )* true,
+
+ _ => false,
}
}
+ }
+
+ impl crate::xir::parse::Nt for $nt {
+ type AttrState = [<$nt AttrState_>];
+ type ChildNt = [<$nt ChildNt_>];
- impl crate::diagnose::Diagnostic for [<$nt Error_>] {
- fn describe(&self) -> Vec<crate::diagnose::AnnotatedSpan> {
- todo!()
+ #[inline]
+ fn matcher() -> crate::xir::parse::NodeMatcher {
+ crate::xir::parse::NodeMatcher::from($qname)
+ }
+ }
+
+ impl std::fmt::Display for $nt {
+ fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+ match self {
+ Self(st) => std::fmt::Display::fmt(st, f),
}
}
+ }
+
+ impl crate::parse::ParseState for $nt {
+ type Token = crate::xir::flat::XirfToken<
+ crate::xir::flat::RefinedText
+ >;
+ type Object = $objty;
+ type Error = crate::xir::parse::NtError<$nt>;
+ type Context = crate::xir::parse::SuperStateContext<Self::Super>;
+ type Super = $super;
- impl crate::parse::ParseState for $nt {
- type Token = crate::xir::flat::XirfToken;
- type Object = $objty;
- type Error = [<$nt Error_>];
-
- fn parse_token(
- self,
- tok: Self::Token,
- _: crate::parse::NoContext,
- ) -> crate::parse::TransitionResult<Self> {
- use crate::{
- parse::{EmptyContext, Transition, Transitionable},
- xir::{
- flat::XirfToken,
- parse::attr::parse_attrs,
+ fn parse_token(
+ self,
+ tok: Self::Token,
+ #[allow(unused_variables)] // used only if child NTs
+ ctx: &mut Self::Context,
+ ) -> crate::parse::TransitionResult<Self::Super> {
+ use crate::{
+ parse::{Transition, Transitionable},
+ xir::{
+ EleSpan,
+ flat::XirfToken,
+ parse::{parse_attrs, NtState},
+ },
+ };
+
+ use NtState::{
+ Attrs, Expecting, NonPreemptableExpecting,
+ RecoverEleIgnore, CloseRecoverIgnore,
+ RecoverEleIgnoreClosed, Closed, Jmp,
+ };
+
+ let Self(selfst) = self;
+ #[allow(unused_variables)] // stack sometimes unused
+ let (stack, attr_fields) = ctx.parts();
+
+ match (selfst, tok) {
+ (
+ Expecting | NonPreemptableExpecting,
+ XirfToken::Open(qname, span, depth)
+ ) if $nt::matches(qname) => {
+ use crate::xir::parse::AttrFieldSum;
+ attr_fields.init_fields::<[<$nt AttrFields>]>();
+
+ let transition = Transition(Self(Attrs(
+ (qname, span, depth),
+ parse_attrs(qname, span)
+ )));
+
+ // Streaming attribute parsing will cause the
+ // attribute map to be yielded immediately as the
+ // opening object,
+ // since we will not be aggregating attrs.
+ $(
+ // Used only to match on `[attr]`.
+ let [<_ $attr_stream_binding>] = ();
+ return transition.ok(<$objty>::from($attrmap));
+ )?
+
+ // If the `[attr]` special form was _not_
+ // provided,
+ // we'll be aggregating attributes.
+ #[allow(unreachable_code)]
+ transition.incomplete()
+ },
+
+ (
+ Closed(..),
+ XirfToken::Open(qname, span, depth)
+ ) if Self::matches(qname) => {
+ use crate::xir::parse::AttrFieldSum;
+ attr_fields.init_fields::<[<$nt AttrFields>]>();
+
+ Transition(Self(Attrs(
+ (qname, span, depth),
+ parse_attrs(qname, span)
+ ))).incomplete()
+ },
+
+ // We only attempt recovery when encountering an
+ // unknown token if we're forced to accept that token.
+ (
+ NonPreemptableExpecting,
+ XirfToken::Open(qname, span, depth)
+ ) => {
+ Transition(Self(
+ RecoverEleIgnore(qname, span, depth)
+ )).err(
+ Self::Error::UnexpectedEle(
+ qname, span.name_span()
+ )
+ )
+ },
+
+ (
+ RecoverEleIgnore(qname, _, depth_open),
+ XirfToken::Close(_, span, depth_close)
+ ) if depth_open == depth_close => {
+ Transition(Self(
+ RecoverEleIgnoreClosed(qname, span)
+ )).incomplete()
+ },
+
+ // Streaming attribute matching takes precedence over
+ // aggregate.
+ // This is primarily me being lazy,
+ // because it's not worth a robust syntax for something
+ // that's rarely used
+ // (macro-wise, I mean;
+ // it's heavily utilized as a percentage of
+ // source file parsed since short-hand template
+ // applications are heavily used).
+ $(
+ (
+ st @ Attrs(..),
+ XirfToken::Attr($attr_stream_binding),
+ ) => {
+ Transition(Self(st))
+ .ok(<$objty>::from($attr_stream_map))
},
- };
- use $nt::{
- Attrs_, Expecting_, RecoverEleIgnore_,
- RecoverEleIgnoreClosed_, ExpectClose_, Closed_
- };
+ // Override the aggregate attribute parser
+ // delegation by forcing the below match to become
+ // unreachable
+ // (xref anchor <<SATTR>>).
+ // Since we have already emitted the `$attrmap`
+ // object on `Open`,
+ // this yields an incomplete parse.
+ (Attrs(meta, _), tok) => {
+ ele_parse!(@!ntref_delegate
+ stack,
+ Self(Jmp($ntfirst(meta))),
+ $ntfirst_st,
+ Transition($ntfirst_st::default())
+ .incomplete()
+ .with_lookahead(tok),
+ Transition(Self(Jmp($ntfirst(meta))))
+ .incomplete()
+ .with_lookahead(tok)
+ )
+ }
+ )?
+
+ // This becomes unreachable when the `[attr]` special
+ // form is provided,
+ // which overrides this match directly above
+ // (xref <<SATTR>>).
+ #[allow(unreachable_patterns)]
+ (Attrs(meta @ (qname, span, depth), sa), tok) => {
+ use crate::xir::parse::AttrFieldSum;
+
+ sa.delegate_until_obj::<Self, _>(
+ tok,
+ attr_fields.narrow::<[<$nt AttrFields>]>(span),
+ |sa| Transition(Self(Attrs(meta, sa))),
+ // If we enter a dead state then we have
+ // failed produce an attribute object,
+ // in which case we'll recover by ignoring
+ // the entire element.
+ || Transition(Self(RecoverEleIgnore(qname, span, depth))),
+ |#[allow(unused_variables)] sa, attrs| {
+ let obj = match attrs {
+ // Attribute field bindings for `$attrmap`
+ [<$nt Attrs>] {
+ $(
+ $field,
+ )*
+ } => {
+ // Optional `OpenSpan` binding
+ let _ = qname; // avoid unused warning
+ $(
+ use crate::xir::parse::attr::AttrParseState;
+ let $qname_matched = qname;
+ let $open_span = sa.element_span();
+ )?
+
+ <$objty>::from($attrmap)
+ },
+ };
+
+ // Lookahead is added by `delegate_until_obj`.
+ ele_parse!(@!ntref_delegate
+ stack,
+ Self(Jmp($ntfirst(meta))),
+ $ntfirst_st,
+ Transition(<$ntfirst_st>::default()).ok(obj),
+ Transition(Self(Jmp($ntfirst(meta)))).ok(obj)
+ )
+ }
+ )
+ },
+
+ $(
+ // We're transitioning from `(ntprev) -> (ntnext)`.
+ // If we have a token that matches `ntprev`,
+ // we can transition _back_ to that state rather
+ // than transitioning forward.
+ // We can _only_ do this when we know we are
+ // transitioning away from this state,
+ // otherwise we could return to a previous state,
+ // which violates the semantics of the implied
+ // DFA.
+ (
+ Jmp($ntprev(meta)),
+ XirfToken::Open(qname, span, depth)
+ ) if $ntprev_st::matches(qname) => {
+ let tok = XirfToken::Open(qname, span, depth);
- match (self, tok) {
- (Expecting_, XirfToken::Open(qname, span, ..)) if qname == $qname => {
- Transition(Attrs_(parse_attrs(qname, span)))
- .incomplete()
+ ele_parse!(@!ntref_delegate
+ stack,
+ Self(Jmp($ntprev(meta))),
+ $ntprev_st,
+ // This NT said it could process this token,
+ // so force it to either do so or error,
+ // to ensure that bugs don't cause infinite
+ // processing of lookahead.
+ Transition(<$ntprev_st>::non_preemptable())
+ .incomplete()
+ .with_lookahead(tok),
+ Transition(Self(Jmp($ntprev(meta))))
+ .incomplete()
+ .with_lookahead(tok)
+ )
},
- (Expecting_, XirfToken::Open(qname, span, depth)) => {
- Transition(RecoverEleIgnore_(qname, span, depth)).err(
- [<$nt Error_>]::UnexpectedEle_(qname, span.name_span())
+ (Jmp($ntprev(meta)), tok) => {
+ ele_parse!(@!ntref_delegate
+ stack,
+ Self(Jmp($ntnext(meta))),
+ $ntnext_st,
+ Transition(<$ntnext_st>::default())
+ .incomplete()
+ .with_lookahead(tok),
+ Transition(Self(Jmp($ntnext(meta))))
+ .incomplete()
+ .with_lookahead(tok)
)
},
+ // Since `ExpectClose_` does not have an `$ntprev`
+ // match,
+ // we have to handle transitioning back to the
+ // previous state as a special case.
+ // Further,
+ // we choose to transition back to this state
+ // _no matter what the element_,
+ // to force error recovery and diagnostics
+ // in that context,
+ // which will tell the user what elements were
+ // expected in the last NT rather than just
+ // telling them a closing tag was expected.
+ //
+ // To avoid a bunch of rework of this macro
+ // (which can hopefully be done in the future),
+ // this match is output for _every_ NT,
+ // but takes effect only for the final NT because
+ // of the `is_last_nt` predicate.
+ // _It is important that it only affect the
+ // final NT_,
+ // otherwise we'll transition back to _any_
+ // previous state at the close,
+ // which completely defeats the purpose of
+ // having ordered states.
(
- RecoverEleIgnore_(qname, _, depth_open),
- XirfToken::Close(_, span, depth_close)
- ) if depth_open == depth_close => {
- Transition(RecoverEleIgnoreClosed_(qname, span)).incomplete()
+ Jmp([<$nt ChildNt_>]::ExpectClose_(meta)),
+ XirfToken::Open(qname, span, depth)
+ ) if Self(Jmp($ntprev(meta))).is_last_nt() => {
+ let tok = XirfToken::Open(qname, span, depth);
+ ele_parse!(@!ntref_delegate_nodone
+ stack,
+ Self(Jmp($ntprev(meta))),
+ $ntprev_st,
+ // If this NT cannot handle this element,
+ // it should error and enter recovery to
+ // ignore it.
+ Transition(<$ntprev_st>::non_preemptable())
+ .incomplete()
+ .with_lookahead(tok)
+ )
},
+ )*
+
+ // XIRF ensures proper nesting,
+ // so we do not need to check the element name.
+ (
+ Jmp([<$nt ChildNt_>]::ExpectClose_((qname, _, depth)))
+ | CloseRecoverIgnore((qname, _, depth), _),
+ XirfToken::Close(_, span, tok_depth)
+ ) if tok_depth == depth => {
+ $(
+ let $close_span = span;
+ )?
+ $closemap.transition(Self(Closed(Some(qname), span.tag_span())))
+ },
+
+ (
+ Jmp([<$nt ChildNt_>]::ExpectClose_(meta @ (qname, otspan, _))),
+ unexpected_tok
+ ) => {
+ use crate::parse::Token;
+ Transition(Self(
+ CloseRecoverIgnore(meta, unexpected_tok.span())
+ )).err(
+ Self::Error::CloseExpected(qname, otspan, unexpected_tok)
+ )
+ }
+
+ // We're still in recovery,
+ // so this token gets thrown out.
+ (st @ (RecoverEleIgnore(..) | CloseRecoverIgnore(..)), _) => {
+ Transition(Self(st)).incomplete()
+ },
+
+ // Note that this does not necessarily represent an
+ // accepting state
+ // (see `is_accepting`).
+ (
+ st @ (
+ Expecting
+ | NonPreemptableExpecting
+ | Closed(..)
+ | RecoverEleIgnoreClosed(..)
+ ),
+ tok
+ ) => {
+ Transition(Self(st)).dead(tok)
+ }
+ }
+ }
+
+ fn is_accepting(&self, _: &Self::Context) -> bool {
+ use crate::xir::parse::NtState::*;
+ matches!(*self, Self(Closed(..) | RecoverEleIgnoreClosed(..)))
+ }
+ }
+ }};
+
+ (@!ele_dfn_sum <$objty:ty> $vis:vis $super:ident
+ $(#[$nt_attr:meta])* $nt:ident [$($ntref:ident)*]
+ ) => {paste::paste! {
+ $(#[$nt_attr])*
+ ///
+ #[doc=concat!(
+ "Parser expecting one of ",
+ $("[`", stringify!($ntref), "`], ",)*
+ "."
+ )]
+ #[derive(Debug, PartialEq, Eq, Default)]
+ $vis struct $nt(crate::xir::parse::SumNtState<$nt>);
+
+ // Must be a _unique_ unit type to avoid conflicting trait impls.
+ #[doc(hidden)]
+ #[derive(Debug, PartialEq, Eq, Default)]
+ $vis struct [<$nt AttrFields>];
+
+ impl $nt {
+ fn non_preemptable() -> Self {
+ Self(crate::xir::parse::SumNtState::NonPreemptableExpecting)
+ }
+
+ // Whether the given QName would be matched by any of the
+ // parsers associated with this type.
+ //
+ // This is short-circuiting and will return as soon as one
+ // parser is found,
+ // so it may be a good idea to order the sum type according
+ // to the most likely value to be encountered.
+ // At its worst,
+ // this may be equivalent to a linear search of the parsers.
+ // With that said,
+ // Rust/LLVM may optimize this in any number of ways,
+ // especially if each inner parser matches on a QName
+ // constant.
+ // Let a profiler and disassembly guide you.
+ #[allow(dead_code)] // used by superstate
+ fn matches(qname: crate::xir::QName) -> bool {
+ // If we used an array or a trait,
+ // then we'd need everything to be a similar type;
+ // this allows for _any_ type provided that it expands
+ // into something that contains a `matches` associated
+ // function of a compatible type.
+ false $(|| $ntref::matches(qname))*
+ }
+
+ // Number of
+ // [`NodeMatcher`](crate::xir::parse::NodeMatcher)s
+ // considered by this parser.
+ //
+ // This is the sum of the number of matches of each
+ // constituent NT.
+ const fn matches_n() -> usize {
+ // Count the number of NTs by adding the number of
+ // matches in each.
+ 0 $(+ $ntref::matches_n())*
+ }
+
+ /// Format constituent NTs for display.
+ ///
+ /// This function receives the number of values to be
+ /// formatted as `n` and the current 0-indexed offset within
+ /// that list as `i`.
+ /// This allows for zero-copy rendering of composable NTs.
+ ///
+ /// See also [`SumNt::fmt_matches_top`] to initialize the
+ /// formatting process with the correct values.
+ ///
+ /// [`SumNt::fmt_matches_top`]: crate::xir::parse::SumNt
+ fn fmt_matches(
+ n: usize,
+ i: &mut usize,
+ f: &mut std::fmt::Formatter
+ ) -> std::fmt::Result {
+ $(
+ $ntref::fmt_matches(n, i, f)?;
+ )*
+
+ Ok(())
+ }
+
+ /// Whether the parser is in a state that can tolerate
+ /// superstate node preemption.
+ ///
+ /// For more information,
+ /// see the superstate
+ #[doc=concat!(
+ " [`", stringify!($super), "::can_preempt_node`]."
+ )]
+ fn can_preempt_node(&self) -> bool {
+ match self {
+ Self(st) => st.can_preempt_node(),
+ }
+ }
+ }
+
+ impl crate::xir::parse::SumNt for $nt {
+ /// Begin formatting using [`Self::fmt_matches`].
+ ///
+ /// This provides the initial values for the function.
+ fn fmt_matches_top(f: &mut std::fmt::Formatter) -> std::fmt::Result {
+ Self::fmt_matches(Self::matches_n().saturating_sub(1), &mut 0, f)
+ }
+ }
+
+ impl std::fmt::Display for $nt {
+ fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+ match self {
+ Self(st) => std::fmt::Display::fmt(st, f),
+ }
+ }
+ }
+
+ impl crate::parse::ParseState for $nt {
+ type Token = crate::xir::flat::XirfToken<
+ crate::xir::flat::RefinedText
+ >;
+ type Object = $objty;
+ type Error = crate::xir::parse::SumNtError<$nt>;
+ type Context = crate::xir::parse::SuperStateContext<Self::Super>;
+ type Super = $super;
- (st @ RecoverEleIgnore_(..), _) => {
- Transition(st).incomplete()
+ fn parse_token(
+ self,
+ tok: Self::Token,
+ ctx: &mut Self::Context,
+ ) -> crate::parse::TransitionResult<Self::Super> {
+ use crate::{
+ parse::Transition,
+ xir::{
+ flat::XirfToken,
+ EleSpan,
+ parse::SumNtState::{
+ Expecting,
+ NonPreemptableExpecting,
+ RecoverEleIgnore,
},
+ },
+ };
- (Attrs_(sa), tok) => {
- sa.delegate_until_obj(
- tok,
- EmptyContext,
- |sa| Transition(Attrs_(sa)),
- || unreachable!("see ParseState::delegate_until_obj dead"),
- |#[allow(unused_variables)] sa, attrs| {
- let obj = match attrs {
- // Attribute field bindings for `$attrmap`
- [<$nt Attrs_>] {
- $(
- $field,
- )*
- } => {
- // Optional `OpenSpan` binding
- $(
- use crate::xir::parse::attr::AttrParseState;
- let $open_span = sa.element_span();
- )?
-
- $attrmap
- },
- };
-
- Transition($ntfirst(Default::default())).ok(obj)
- }
+ let (stack, _) = ctx.parts();
+
+ match (self.0, tok) {
+ $(
+ (
+ st @ (Expecting | NonPreemptableExpecting),
+ XirfToken::Open(qname, span, depth)
+ ) if $ntref::matches(qname) => {
+ ele_parse!(@!ntref_delegate_nodone
+ stack,
+ Self(Expecting),
+ $ntref,
+ Transition(
+ // Propagate non-preemption status,
+ // otherwise we'll provide a lookback
+ // of the original token and end up
+ // recursing until we hit the `stack`
+ // limit.
+ match st {
+ NonPreemptableExpecting => {
+ $ntref::non_preemptable()
+ }
+ _ => {
+ $ntref::default()
+ }
+ }
+ ).incomplete().with_lookahead(
+ XirfToken::Open(qname, span, depth)
+ )
)
},
- $(
- ($ntprev(st_inner), tok) => {
- st_inner.delegate(
- tok,
- EmptyContext,
- |si| Transition($ntprev(si)),
- || Transition($ntnext(Default::default()))
+ (
+ NonPreemptableExpecting,
+ XirfToken::Open(qname, span, depth)
+ ) if $ntref::matches(qname) => {
+ ele_parse!(@!ntref_delegate_nodone
+ stack,
+ Self(Expecting),
+ $ntref,
+ Transition(
+ $ntref::non_preemptable()
+ ).incomplete().with_lookahead(
+ XirfToken::Open(qname, span, depth)
)
- },
- )*
-
- // XIRF ensures proper nesting,
- // so this must be our own closing tag.
- (ExpectClose_(_), XirfToken::Close(_, span, _)) => {
- $(
- let $close_span = span;
- )?
- $closemap.transition(Closed_(span.tag_span()))
+ )
},
+ )*
+
+ // If we're non-preemptable,
+ // then we're expected to be able to process this
+ // token or fail trying.
+ (
+ NonPreemptableExpecting,
+ XirfToken::Open(qname, span, depth)
+ ) => {
+ Transition(Self(
+ RecoverEleIgnore(qname, span, depth, Default::default())
+ )).err(
+ // Use name span rather than full `OpenSpan`
+ // since it's specifically the name that was
+ // unexpected,
+ // not the fact that it's an element.
+ Self::Error::UnexpectedEle(
+ qname,
+ span.name_span(),
+ Default::default(),
+ )
+ )
+ },
+
+ // An unexpected token when repeating ends repetition
+ // and should not result in an error.
+ (
+ Expecting | NonPreemptableExpecting,
+ tok
+ ) => Transition(Self(Expecting)).dead(tok),
- // TODO: Use `is_accepting` guard if we do not utilize
- // exhaustiveness check.
- (st @ (Closed_(..) | RecoverEleIgnoreClosed_(..)), tok) =>
- Transition(st).dead(tok),
+ // XIRF ensures that the closing tag matches the opening,
+ // so we need only check depth.
+ (
+ RecoverEleIgnore(_, _, depth_open, _),
+ XirfToken::Close(_, _, depth_close)
+ ) if depth_open == depth_close => {
+ Transition(Self(Expecting)).incomplete()
+ },
- todo => todo!("{todo:?}"),
+ (st @ RecoverEleIgnore(..), _) => {
+ Transition(Self(st)).incomplete()
+ },
+ }
+ }
+
+ fn is_accepting(&self, _: &Self::Context) -> bool {
+ use crate::xir::parse::SumNtState;
+ matches!(self, Self(SumNtState::Expecting))
+ }
+ }
+ }};
+
+ // Generate superstate sum type.
+ //
+ // This is really annoying because we cannot read the output of another
+ // macro,
+ // and so we have to do our best to re-parse the body of the
+ // original `ele_parse!` invocation without duplicating too much
+ // logic,
+ // and we have to do so in a way that we can aggregate all of
+ // those data.
+ (@!super_sum <$objty:ty> $(#[$super_attr:meta])* $vis:vis $super:ident
+ $(
+ [super] {
+ // Non-whitespace text nodes can be mapped into elements
+ // with the given QName as a preprocessing step,
+ // allowing them to reuse the existing element NT system.
+ $([text]($text:ident, $text_span:ident) => $text_map:expr,)?
+
+ // Optional _single_ NT to preempt arbitrary elements.
+ // Sum NTs can be used to preempt multiple elements.
+ $($pre_nt:ident)?
+ }
+ )?
+ $(
+ // NT definition is always followed by `:=`.
+ $(#[$_ident_attr:meta])*
+ $nt:ident :=
+ // Identifier if an element NT.
+ $($_i:ident)?
+ // Parenthesis for a sum NT,
+ // or possibly the span match for an element NT.
+ // So: `:= QN_IDENT(span)` or `:= (A | B | C)`.
+ $( ($($_p:tt)*) )?
+ // Braces for an element NT body.
+ $( {$($_b:tt)*} )?
+ // Element and sum NT both conclude with a semicolon,
+ // which we need to disambiguate the next `$nt`.
+ ;
+ )*
+ ) => { paste::paste! {
+ $(#[$super_attr])*
+ ///
+ /// Superstate representing the union of all related parsers.
+ ///
+ /// This [`ParseState`] allows sub-parsers to independently the
+ /// states associated with their own subgraph,
+ /// and then yield a state transition directly to a state of
+ /// another parser.
+ /// This is conceptually like CPS (continuation passing style),
+ /// where this [`ParseState`] acts as a trampoline.
+ ///
+ /// This [`ParseState`] is required for use with [`Parser`];
+ /// see [`ClosedParseState`] for more information.
+ ///
+ /// [`Parser`]: crate::parse::Parser
+ /// [`ParseState`]: crate::parse::ParseState
+ /// [`ClosedParseState`]: crate::parse::ClosedParseState
+ #[derive(Debug, PartialEq, Eq)]
+ $vis enum $super {
+ $(
+ $nt($nt),
+ )*
+ }
+
+ /// Superstate attribute context sum type.
+ ///
+ /// For more information on why this exists,
+ /// see [`AttrFieldSum`](crate::xir::parse::AttrFieldSum).
+ #[derive(Debug, Default)]
+ $vis enum [<$super AttrFields>] {
+ #[default]
+ /// Indicates that no attribute parsing is active.
+ ///
+ /// Since attribute parsing is initialized at each attribute
+ /// state transition,
+ /// this will never be read.
+ /// Further,
+ /// this may never be utilized beyond the initial construction
+ /// of the superstate's context.
+ Uninitialized,
+
+ $(
+ $nt([<$nt AttrFields>]),
+ )*
+ }
+
+ impl crate::xir::parse::AttrFieldSum for [<$super AttrFields>] {}
+
+ // Each NT has its own attribute parsing
+ // (except for sum types);
+ // we need to expose a way to initialize parsing for each and
+ // then narrow the type to the appropriate `Context` for the
+ // respective NT's attribute parser.
+ $(
+ impl crate::xir::parse::AttrFieldOp<[<$nt AttrFields>]>
+ for [<$super AttrFields>]
+ {
+ fn init_new() -> Self {
+ Self::$nt(Default::default())
+ }
+
+ fn narrow(
+ &mut self,
+ open_span: crate::xir::OpenSpan,
+ ) -> &mut [<$nt AttrFields>]
+ {
+ use crate::xir::EleSpan;
+ use crate::diagnose::Annotate;
+
+ // Maybe Rust will support more robust dependent types
+ // in the future to make this unnecessary;
+ // see trait docs for this method for more information.
+ match self {
+ // This should _always_ be the case unless if the
+ // system properly initializes attribute parsing
+ // when transitioning to the `Attr` state.
+ Self::$nt(fields) => fields,
+
+ // Using `unreachable_unchecked` did not have any
+ // performance benefit at the time of writing.
+ _ => crate::diagnostic_unreachable!(
+ open_span
+ .span()
+ .internal_error(
+ "failed to initialize attribute parsing \
+ for this element"
+ )
+ .into(),
+ "invalid AttrFields",
+ ),
}
}
+ }
+ )*
+
+ // Default parser is the first NT,
+ // and is non-preemptable to force error handling if the root node
+ // is unexpected.
+ // Note that this also prevents preemption at the root,
+ // which is necessary for now anyway since we need to be able
+ // to statically resolve imports without template expansion in
+ // NIR
+ // (otherwise we have a chicken-and-egg problem).
+ impl Default for $super {
+ fn default() -> Self {
+ ele_parse!(@!ntfirst_init $super, $($nt)*)
+ }
+ }
+
+ $(
+ impl From<$nt> for $super {
+ fn from(st: $nt) -> Self {
+ $super::$nt(st)
+ }
+ }
+ )*
- fn is_accepting(&self) -> bool {
- matches!(*self, Self::Closed_(..) | Self::RecoverEleIgnoreClosed_(..))
+ impl std::fmt::Display for $super {
+ fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+ match self {
+ $(
+ Self::$nt(e) => std::fmt::Display::fmt(e, f),
+ )*
}
}
}
- };
- (@!ele_dfn_sum $nt:ident [$($ntref:ident)*]) => {
- #[derive(Debug, PartialEq, Eq)]
- enum $nt {
+ /// Superstate error object representing the union of all related
+ /// parsers' errors.
+ #[derive(Debug, PartialEq)]
+ $vis enum [<$super Error_>] {
$(
- $ntref($ntref),
+ $nt(<$nt as crate::parse::ParseState>::Error),
)*
}
- };
+
+ $(
+ impl From<<$nt as crate::parse::ParseState>::Error>
+ for [<$super Error_>]
+ {
+ fn from(e: <$nt as crate::parse::ParseState>::Error) -> Self {
+ [<$super Error_>]::$nt(e)
+ }
+ }
+ )*
+
+ impl std::error::Error for [<$super Error_>] {
+ fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
+ // TODO
+ None
+ }
+ }
+
+ impl std::fmt::Display for [<$super Error_>] {
+ fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+ match self {
+ $(
+ Self::$nt(e) => std::fmt::Display::fmt(e, f),
+ )*
+ }
+ }
+ }
+
+ impl crate::diagnose::Diagnostic for [<$super Error_>] {
+ fn describe(&self) -> Vec<crate::diagnose::AnnotatedSpan> {
+ match self {
+ $(
+ Self::$nt(e) => e.describe(),
+ )*
+ }
+ }
+ }
+
+ impl crate::parse::ParseState for $super {
+ type Token = crate::xir::flat::XirfToken<
+ crate::xir::flat::RefinedText
+ >;
+ type Object = $objty;
+ type Error = [<$super Error_>];
+ type Context = crate::xir::parse::SuperStateContext<Self>;
+
+ fn parse_token(
+ self,
+ tok: Self::Token,
+ ctx: &mut Self::Context,
+ ) -> crate::parse::TransitionResult<Self> {
+ use crate::{
+ parse::Transition,
+ xir::flat::{XirfToken, RefinedText},
+ };
+
+ // Used only by _some_ expansions.
+ #[allow(unused_imports)]
+ use crate::xir::flat::Text;
+
+ match (self, tok) {
+ // [super] {
+ $(
+ // [text] preemption;
+ // see `Self::can_preempt_node`.
+ $(
+ (
+ st,
+ XirfToken::Text(
+ RefinedText::Unrefined(
+ Text($text, $text_span)
+ ),
+ _,
+ )
+ ) if st.can_preempt_node() => {
+ Transition(st).ok(<$objty>::from($text_map))
+ },
+ )?
+
+ // Preemption NT
+ $(
+ (
+ st,
+ XirfToken::Open(
+ qname,
+ ospan,
+ depth,
+ ),
+ ) if st.can_preempt_node() && $pre_nt::matches(qname) => {
+ let (stack, _) = ctx.parts();
+
+ stack.transfer_with_ret(
+ Transition(st),
+ Transition(
+ // Prevent recursing on this token.
+ $pre_nt::non_preemptable()
+ )
+ .incomplete()
+ .with_lookahead(XirfToken::Open(
+ qname,
+ ospan,
+ depth,
+ )),
+ )
+ },
+ )?
+ )?
+ // }
+
+ // Depth check is unnecessary since _all_ xir::parse
+ // parsers
+ // (at least at the time of writing)
+ // ignore whitespace and comments,
+ // so may as well return early.
+ // TODO: I'm ignoring _all_ text for now to
+ // proceed with development; fix.
+ (
+ st,
+ XirfToken::Text(RefinedText::Whitespace(..), _)
+ | XirfToken::Comment(..)
+ ) => {
+ Transition(st).incomplete()
+ }
+
+ $(
+ // Pass token directly to child until it reports
+ // a dead state,
+ // after which we return to the `ParseState`
+ // atop of the stack.
+ (Self::$nt(st), tok) => st.delegate_child(
+ tok,
+ ctx,
+ |deadst, tok, ctx| {
+ let (stack, _) = ctx.parts();
+ stack.ret_or_dead(tok, deadst)
+ },
+ ),
+ )*
+ }
+ }
+
+ fn is_accepting(&self, ctx: &Self::Context) -> bool {
+ // This is short-circuiting,
+ // starting at the _bottom_ of the stack and moving
+ // upward.
+ // The idea is that,
+ // is we're still in the middle of parsing,
+ // then it's almost certain that the [`ParseState`] on the
+ // bottom of the stack will not be in an accepting
+ // state,
+ // and so we can stop checking early.
+ // In most cases,
+ // if we haven't hit EOF early,
+ // the stack should be either empty or consist of only the
+ // root state.
+ //
+ // After having considered the stack,
+ // we can then consider the active `ParseState`.
+ ctx.stack_ref().all(|st| st.is_inner_accepting(ctx))
+ && self.is_inner_accepting(ctx)
+ }
+ }
+
+ impl $super {
+ /// Whether the inner (active child) [`ParseState`] is in an
+ /// accepting state.
+ ///
+ /// [`ParseState`]: crate::parse::ParseState
+ fn is_inner_accepting(
+ &self,
+ ctx: &<Self as crate::parse::ParseState>::Context
+ ) -> bool {
+ use crate::parse::ParseState;
+
+ match self {
+ $(
+ Self::$nt(st) => st.is_accepting(ctx),
+ )*
+ }
+ }
+
+ /// Whether the inner parser is in a state that can tolerate
+ /// superstate node preemption.
+ ///
+ /// Node preemption allows us (the superstate) to ask for
+ /// permission from the inner parser to parse some token
+ /// ourselves,
+ /// by asking whether the parser is in a state that would
+ /// cause semantic issues if we were to do so.
+ ///
+ /// For example,
+ /// if we were to preempt text nodes while an inner parser was
+ /// still parsing attributes,
+ /// then we would emit an object associated with that text
+ /// before the inner parser had a chance to conclude that
+ /// attribute parsing has completed and emit the opening
+ /// object for that node;
+ /// the result would otherwise be an incorrect
+ /// `Text, Open` instead of the correct `Open, Text`,
+ /// which would effectively unparent the text.
+ /// Similarly,
+ /// if we were to parse our own tokens while an inner parser
+ /// was performing error recovery in such a way as to ignore
+ /// all child tokens,
+ /// then we would emit an object in an incorrect context.
+ #[allow(dead_code)] // TODO: Remove when using for tpl apply
+ fn can_preempt_node(&self) -> bool {
+ match self {
+ $(
+ Self::$nt(st) => st.can_preempt_node(),
+ )*
+ }
+ }
+ }
+
+ impl crate::xir::parse::SuperState for $super {
+ type AttrFields = [<$super AttrFields>];
+ }
+ }};
+
+ (@!ntfirst_init $super:ident, $ntfirst:ident $($nt:ident)*) => {
+ $super::$ntfirst($ntfirst::non_preemptable())
+ }
+}
+
+/// Superstate.
+///
+/// A superstate is responsible for aggregating all nonterminals and serving
+/// as a trampoline to delegate parsing operations.
+///
+/// Conceptually,
+/// a superstate acts as a runtime for the state machine defined by NT
+/// interdependencies.
+/// It represents the reification of such a state machine and all of its
+/// transitions.
+pub trait SuperState: ClosedParseState {
+ /// Sum type holding a variant for every [`Nt`]'s attribute parsing
+ /// context.
+ ///
+ /// This holds the fields for each element as they are being
+ /// aggregated,
+ /// before a final attribute object is produced.
+ type AttrFields: Debug + Default;
+}
+
+/// Attribute context operations for individual NTs.
+///
+/// This is implemented for each NT's attribute parsing context by
+/// [`ele_parse!`] during superstate generation.
+///
+/// See [`AttrFieldSum`] for further explanation.
+pub trait AttrFieldOp<T>: AttrFieldSum + Sized {
+ /// Initialize a new attribute parsing context for the given NT's
+ /// attribute parsing context (represented by `T`).
+ ///
+ /// This must be invoked before attribute parsing begins for an element,
+ /// otherwise there will be a type mismatch during [`Self::narrow`]
+ /// that will result in a panic.
+ fn init_new() -> Self;
+
+ /// Narrow the [`AttrFieldSum`] into the attribute context `T`,
+ /// panicing if narrowing fails.
+ ///
+ /// The provided [`OpenSpan`] is utilized only for a diagnostic panic if
+ /// lowering fails,
+ /// and should never be utilized in a correctly implemented system.
+ ///
+ /// Panics
+ /// ======
+ /// This will issue a diagnostic panic if the requested type `T` was not
+ /// the last type initialized using [`Self::init_new`].
+ /// The idea is that,
+ /// if [`ele_parse`] is properly implemented,
+ /// non-matching branches should be unreachable,
+ /// and so this panic should never occur.
+ fn narrow(&mut self, open_span: OpenSpan) -> &mut T;
+}
+
+/// Sum type representing the attribute parsing contexts for each [`Nt`]'s
+/// attribute parser.
+///
+/// This may also contain unique unit types for [`SumNt`]s,
+/// which serve no purpose beyond simplifying construction of this sum
+/// type.
+///
+/// Why does this exist?
+/// ====================
+/// Prior to this implementation,
+/// each individual NT's attribute parsers ([`AttrParseState`]s)
+/// had embedded within them their parsing context.
+/// Since [`ParseState`] is immutable,
+/// it relies on Rust's ability to properly optimize away `memcpy`s so
+/// that the construction of a new [`ParseState`] amounts to in-place
+/// mutation of the existing one.
+///
+/// Unfortunately,
+/// some NTs have quite a few attributes,
+/// leading so some [`AttrParseState`]s that were nearing 2KiB in size.
+/// Since the [`AttrParseState`] is a component of NTs' [`ParseState`]s,
+/// their width had to grow to accommodate;
+/// and since [`SuperState`] aggregates all NTs,
+/// the width of the superstate had to accommodate the width of the
+/// largest NT parser.
+///
+/// This snowballing thwarted Rust's optimizations in many cases,
+/// which had a significant impact on performance and undermined the
+/// design of TAME's parsing system.
+/// Further,
+/// it resulted in a situation whereby the introduction of new attributes
+/// or NIR symbol variants would cut `tamec`'s performance in half;
+/// clearly things were only going to get worse.
+///
+/// Most data structures within TAME are used as IRs,
+/// pursuant to TAME's goal of reifying all parser state.
+/// Because of the streaming lowering pipline,
+/// IRs are typically ephemeral,
+/// and so Rust generally optimizes them away in their entirety.
+/// But the needs of [`NIR`](crate::nir`),
+/// for which the [`ele_parse!`] parser-generator was written,
+/// are slightly different—the
+/// NT states are stored on [`StateStack`],
+/// and so their representation cannot be completely optimized away.
+/// For this reason,
+/// the width of these data structures is of greater practical concern.
+///
+/// Separating and Hoisting Intermediate Attribute State
+/// ----------------------------------------------------
+/// The entire reason that [`Context`] exists in TAME's parsing framework
+/// is to be utilized when we're unable to coerce Rust into performing the
+/// necessary optimizations on immutable data structures.
+/// The solution was therefore to extract the field state of the attribute
+/// parser
+/// (representing the ongoing aggregation of attributes,
+/// akin to the Builder pattern in OOP circles)
+/// into a [`Context`],
+/// which removed it from the [`AttrParseState`],
+/// and therefore brought the [`SuperState`] down to a manageable size
+/// (512 bits at the time of writing).
+///
+/// Unfortunately,
+/// this creates a new obvious problem:
+/// how are we to feed the new context to each individual
+/// [`AttrParseState`] if we're keeping that context out of each NT's
+/// individual [`ParseState`]?
+/// By recognizing that only one attribute parser is active at any time,
+/// we would ideally have all such states aggregated into a single memory
+/// location that is only as wide as the largest attribute parsing context.
+/// This is what a sum type (via an `enum`) would give us,
+/// with a small one-byte cost for the discriminant of ~110 variants.
+///
+/// When the attribute context was part of [`AttrParseState`] and therefore
+/// part of each NT's [`ParseState`],
+/// the benefit was that the type of the context is statically known and
+/// could therefore be passed directly to the [`AttrParseState`] without
+/// any further consideration.
+/// But when we decouple that attribute context and hoist it out of all NTs
+/// into a single shared memory location,
+/// then the type becomes dynamic based on the active NT's parser.
+/// The type becomes this sum type ([`AttrFieldSum`]),
+/// which represents all possible types that could serve as such a
+/// context.
+///
+/// Context Narrowing
+/// -----------------
+/// [`AttrFieldSum`] enables polymorphism with respect to the attribute
+/// context,
+/// but the problem is that we have a _contravariant_ relationship—the
+/// context that we pass to the attribute parser must be an element of
+/// the [`AttrFieldSum`] but only one of them is valid.
+/// We must narrow from [`AttrFieldSum`] into the correct type;
+/// this is the job of [`AttrFieldOp`] via [`Self::narrow`].
+///
+/// The idea is this:
+///
+/// 1. We know that only one attribute parser is active at any time,
+/// because we cannot transition to other NTs while performing
+/// attribute parsing.
+/// This invariant is upheld by [`NtState::can_preempt_node`].
+/// 2. During the transition into the [`NtState::Attrs`] state,
+/// [`Self::init_fields`] must be used to prepare the context that
+/// will be required to parse attributes for the element represented
+/// by that respective NT.
+/// This means that this sum type will always assume the variant
+/// representing the appropriate context.
+/// 3. When delegating to the appropriate [`AttrParseState`],
+/// [`Self::narrow`] is used to invoke [`AttrFieldOp::narrow`] for
+/// the appropriate attribute context.
+/// Because of #2 above,
+/// this sum type must already have assumed that respective variant,
+/// and matching on that variant will always yield the requested
+/// attribute context type.
+///
+/// Just to be safe,
+/// in case we have some bug in this implementation,
+/// #3's call to [`Self::narrow`] ought to issue a panic;
+/// this provides a proper balance between safety
+/// (if the type is wrong,
+/// there are no memory safety issues)
+/// and ergonomics
+/// (the API is unchanged)
+/// for what should be unreachable code.
+/// Profiling showed no performance improvement at the time of writing when
+/// attempting to utilize [`std::hint::unreachable_unchecked`].
+///
+/// Before and After
+/// ----------------
+/// This implementation imposes an additional cognitive burden on groking
+/// this system,
+/// which is why it was initially passed up;
+/// it was only reconsidered when it was necessitated by performance
+/// characteristics and verified through profiling and analysis of the
+/// target disassembly.
+/// The documentation you are reading now is an attempt to offset the
+/// cognitive burden.
+///
+/// Ultimately,
+/// the amount of code required to implement this change was far less than
+/// the amount of text it takes to describe it here.
+/// And while that's a terrible metric to judge an implementation by,
+/// it is intended to convey that if someone does need to understand this
+/// subsystem,
+/// its bounds are quite limited.
+///
+/// The introduction of this system eliminated 90% of the `memcpy` calls
+/// present in `tamec` at the time of writing,
+/// completely removing most of them from the hot code path
+/// (the lowering pipline);
+/// the major exception is the necessary [`StateStack`],
+/// which exists on a _less hot_ code path,
+/// utilized only during transitions between NTs.
+/// This also clears the brush on paths leading to future optimizations.
+pub trait AttrFieldSum {
+ /// Prepare attribute parsing using the attribute field context `F`.
+ ///
+ /// This must be invoked at the beginning of each transition to
+ /// [`NtState::Attrs`],
+ /// otherwise later narrowing with [`Self::narrow`] will panic.
+ ///
+ /// See [`Self`] and [`AttrFieldOp::init_new`] for more information.
+ fn init_fields<F>(&mut self)
+ where
+ Self: AttrFieldOp<F>,
+ {
+ *self = AttrFieldOp::<F>::init_new();
+ }
+
+ /// Narrow self into the attribute context `T`,
+ /// panicing if narrowing fails.
+ ///
+ /// See [`Self`] and [`AttrFieldOp::narrow`] for more information.
+ fn narrow<F>(&mut self, open_span: OpenSpan) -> &mut F
+ where
+ Self: AttrFieldOp<F>,
+ {
+ AttrFieldOp::<F>::narrow(self, open_span)
+ }
+}
+
+/// Nonterminal.
+///
+/// This trait is used internally by the [`ele_parse!`] parser-generator.
+pub trait Nt: Debug {
+ /// Attribute parser for this element.
+ type AttrState: AttrParseState;
+ /// [`NtState::Jmp`] states for child NTs.
+ type ChildNt: Debug + PartialEq + Eq;
+
+ /// Matcher describing the node recognized by this parser.
+ fn matcher() -> NodeMatcher;
+}
+
+/// States for nonterminals (NTs).
+#[derive(Debug, PartialEq, Eq)]
+pub enum NtState<NT: Nt> {
+ /// Expecting opening tag for element.
+ Expecting,
+
+ /// Non-preemptable [`Self::Expecting`].
+ NonPreemptableExpecting,
+
+ /// Recovery state ignoring all remaining tokens for this
+ /// element.
+ RecoverEleIgnore(QName, OpenSpan, Depth),
+
+ // Recovery completed because end tag corresponding to the
+ // invalid element has been found.
+ RecoverEleIgnoreClosed(QName, CloseSpan),
+
+ /// Recovery state ignoring all tokens when a `Close` is
+ /// expected.
+ ///
+ /// This is token-agnostic---it
+ /// may be a child element,
+ /// but it may be text,
+ /// for example.
+ CloseRecoverIgnore((QName, OpenSpan, Depth), Span),
+
+ /// Parsing element attributes.
+ Attrs((QName, OpenSpan, Depth), NT::AttrState),
+
+ /// Preparing to pass control (jump) to a child NT's parser.
+ Jmp(NT::ChildNt),
+
+ /// Closing tag found and parsing of the element is
+ /// complete.
+ Closed(Option<QName>, Span),
+}
+
+impl<NT: Nt> Default for NtState<NT> {
+ fn default() -> Self {
+ Self::Expecting
+ }
+}
+
+impl<NT: Nt> NtState<NT> {
+ pub fn can_preempt_node(&self) -> bool {
+ use NtState::*;
+
+ match self {
+ // Preemption before the opening tag is safe,
+ // since we haven't started processing yet.
+ Expecting => true,
+
+ // The name says it all.
+ // Instantiated by the superstate.
+ NonPreemptableExpecting => false,
+
+ // Preemption during recovery would cause tokens to be parsed
+ // when they ought to be ignored,
+ // so we must process all tokens during recovery.
+ RecoverEleIgnore(..) | CloseRecoverIgnore(..) => false,
+
+ // It is _not_ safe to preempt attribute parsing since attribute
+ // parsers aggregate until a non-attribute token is
+ // encountered;
+ // we must allow attribute parsing to finish its job
+ // _before_ any preempted nodes are emitted since the
+ // attributes came _before_ that node.
+ Attrs(..) => false,
+
+ // These states represent jump states where we're about to
+ // transition to the next child parser.
+ // It's safe to preempt here,
+ // since we're not in the middle of parsing.
+ //
+ // Note that this includes `ExpectClose_` because of the macro
+ // preprocessing,
+ // and Rust's exhaustiveness check will ensure that it is
+ // accounted for if that changes.
+ // If we're expecting that the next token is a `Close`,
+ // then it must be safe to preempt other nodes that may
+ // appear in this context as children.
+ Jmp(..) => true,
+
+ // If we're done,
+ // we want to be able to yield a dead state so that we can
+ // transition away from this parser.
+ RecoverEleIgnoreClosed(..) | Closed(..) => false,
+ }
+ }
+}
+
+impl<NT: Nt> Display for NtState<NT> {
+ fn fmt(&self, f: &mut Formatter) -> std::fmt::Result {
+ use crate::xir::fmt::{TtCloseXmlEle, TtOpenXmlEle};
+ use NtState::*;
+
+ match self {
+ Expecting | NonPreemptableExpecting => write!(
+ f,
+ "expecting opening tag {}",
+ TtOpenXmlEle::wrap(NT::matcher()),
+ ),
+ RecoverEleIgnore(name, _, _) | RecoverEleIgnoreClosed(name, _) => {
+ write!(
+ f,
+ "attempting to recover by ignoring element \
+ with unexpected name {given} \
+ (expected {expected})",
+ given = TtQuote::wrap(name),
+ expected = TtQuote::wrap(NT::matcher()),
+ )
+ }
+ CloseRecoverIgnore((qname, _, depth), _) => write!(
+ f,
+ "attempting to recover by ignoring input \
+ until the expected end tag {expected} \
+ at depth {depth}",
+ expected = TtCloseXmlEle::wrap(qname),
+ ),
+
+ Attrs(_, sa) => Display::fmt(sa, f),
+ Closed(Some(qname), _) => {
+ write!(f, "done parsing element {}", TtQuote::wrap(qname),)
+ }
+ // Should only happen on an unexpected `Close`.
+ Closed(None, _) => write!(
+ f,
+ "skipped parsing element {}",
+ TtQuote::wrap(NT::matcher()),
+ ),
+ // TODO: A better description.
+ Jmp(_) => {
+ write!(
+ f,
+ "preparing to transition to \
+ parser for next child element(s)"
+ )
+ }
+ }
+ }
+}
+
+/// Sum nonterminal.
+///
+/// This trait is used internally by the [`ele_parse!`] parser-generator.
+pub trait SumNt: Debug {
+ fn fmt_matches_top(f: &mut std::fmt::Formatter) -> std::fmt::Result;
+}
+
+/// States for sum nonterminals.
+///
+/// Sum NTs act like a sum type,
+/// transitioning to the appropriate inner NT based on the next token of
+/// input.
+/// Sum NTs have order-based precedence when faced with ambiguity,
+/// like a PEG.
+///
+/// This is expected to be wrapped by a newtype for each Sum NT,
+/// and does not implement [`ParseState`] itself.
+#[derive(Debug, PartialEq, Eq, Default)]
+pub enum SumNtState<NT: SumNt> {
+ /// Expecting an opening tag for an element.
+ #[default]
+ Expecting,
+
+ /// Non-preemptable [`Self::Expecting`].
+ NonPreemptableExpecting,
+
+ /// Recovery state ignoring all remaining tokens for this
+ /// element.
+ RecoverEleIgnore(QName, OpenSpan, Depth, PhantomData<NT>),
+}
+
+impl<NT: SumNt> SumNtState<NT> {
+ /// Whether the parser is in a state that can tolerate
+ /// superstate node preemption.
+ pub fn can_preempt_node(&self) -> bool {
+ use SumNtState::*;
+
+ match self {
+ // Preemption before the opening tag is safe,
+ // since we haven't started processing yet.
+ Expecting => true,
+
+ // The name says it all.
+ // Instantiated by the superstate.
+ NonPreemptableExpecting => false,
+
+ // Preemption during recovery would cause tokens to
+ // be parsed when they ought to be ignored,
+ // so we must process all tokens during recovery.
+ RecoverEleIgnore(..) => false,
+ }
+ }
+}
+
+impl<NT: SumNt> Display for SumNtState<NT> {
+ fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+ use SumNtState::*;
+
+ match self {
+ Expecting | NonPreemptableExpecting => {
+ write!(f, "expecting ")?;
+ NT::fmt_matches_top(f)
+ }
+
+ RecoverEleIgnore(name, _, _, _) => {
+ write!(
+ f,
+ "attempting to recover by ignoring element \
+ with unexpected name {given} \
+ (expected",
+ given = TtQuote::wrap(name),
+ )?;
+
+ NT::fmt_matches_top(f)?;
+ f.write_str(")")
+ }
+ }
+ }
}
#[cfg(test)]
diff --git a/tamer/src/xir/parse/ele/test.rs b/tamer/src/xir/parse/ele/test.rs
index fbbe658..dbac7e5 100644
--- a/tamer/src/xir/parse/ele/test.rs
+++ b/tamer/src/xir/parse/ele/test.rs
@@ -24,27 +24,32 @@
//! and so testing of that parsing is not duplicated here.
//! A brief visual inspection of the implementation of `ele_parse`
//! should suffice to verify this claim.
+//!
+//! [`Parser`] is configured to output a parse trace to stderr for tests,
+//! which is visible when a test fails;
+//! this aids in debugging and study.
+//! To force it to output on a successful test to observe the behavior of
+//! the system,
+//! simply force the test to panic at the end.
+
+use std::{assert_matches::assert_matches, error::Error, fmt::Display};
use crate::{
convert::ExpectInto,
- parse::{Object, ParseError, ParseState, Parsed},
- span::{Span, DUMMY_SPAN},
+ diagnose::Diagnostic,
+ parse::{
+ FinalizeError, Object, ParseError, ParseState, Parsed, ParsedResult,
+ },
+ span::{dummy::*, Span},
sym::SymbolId,
xir::{
attr::{Attr, AttrSpan},
- flat::{Depth, XirfToken},
- st::qname::*,
- CloseSpan, EleNameLen, EleSpan, OpenSpan,
+ flat::{Depth, RefinedText, Text, Whitespace, XirfToken},
+ st::{prefix::*, qname::*},
+ CloseSpan, EleNameLen, EleSpan, OpenSpan, QName,
},
};
-const S1: Span = DUMMY_SPAN;
-const S2: Span = S1.offset_add(1).unwrap();
-const S3: Span = S2.offset_add(1).unwrap();
-const S4: Span = S3.offset_add(1).unwrap();
-const S5: Span = S4.offset_add(1).unwrap();
-const S6: Span = S5.offset_add(1).unwrap();
-
// Some number (value does not matter).
const N: EleNameLen = 10;
@@ -55,11 +60,12 @@ fn empty_element_no_attrs_no_close() {
impl Object for Foo {}
ele_parse! {
+ enum Sut;
type Object = Foo;
- Sut := QN_PACKAGE {
+ Root := QN_PACKAGE {
@ {} => Foo,
- }
+ };
}
let toks = vec![
@@ -70,9 +76,9 @@ fn empty_element_no_attrs_no_close() {
assert_eq!(
Ok(vec![
- Parsed::Incomplete, // [Sut] Open
- Parsed::Object(Foo), // [Sut@] Close (>LA)
- Parsed::Incomplete, // [Sut] Close (<LA)
+ Parsed::Incomplete, // [Root] Open
+ Parsed::Object(Foo), // [Root@] Close (>LA)
+ Parsed::Incomplete, // [Root] Close (<LA)
]),
Sut::parse(toks.into_iter()).collect(),
);
@@ -91,12 +97,13 @@ fn empty_element_no_attrs_with_close() {
impl Object for Foo {}
ele_parse! {
+ enum Sut;
type Object = Foo;
- Sut := QN_PACKAGE {
+ Root := QN_PACKAGE {
@ {} => Foo::Attr,
/ => Foo::Close,
- }
+ };
}
let toks = vec![
@@ -107,9 +114,9 @@ fn empty_element_no_attrs_with_close() {
assert_eq!(
Ok(vec![
- Parsed::Incomplete, // [Sut] Open
- Parsed::Object(Foo::Attr), // [Sut@] Close (>LA)
- Parsed::Object(Foo::Close), // [Sut] Close (<LA)
+ Parsed::Incomplete, // [Root] Open
+ Parsed::Object(Foo::Attr), // [Root@] Close (>LA)
+ Parsed::Object(Foo::Close), // [Root] Close (<LA)
]),
Sut::parse(toks.into_iter()).collect(),
);
@@ -128,12 +135,13 @@ fn empty_element_no_attrs_with_close_with_spans() {
impl crate::parse::Object for Foo {}
ele_parse! {
+ enum Sut;
type Object = Foo;
- Sut := QN_PACKAGE(ospan) {
+ Root := QN_PACKAGE(_, ospan) {
@ {} => Foo::Attr(ospan),
/(cspan) => Foo::Close(cspan),
- }
+ };
}
let toks = vec![
@@ -145,36 +153,201 @@ fn empty_element_no_attrs_with_close_with_spans() {
use Parsed::*;
assert_eq!(
Ok(vec![
- Incomplete, // [Sut] Open
- Object(Foo::Attr(OpenSpan(S1, N))), // [Sut@] Close (>LA)
- Object(Foo::Close(CloseSpan::empty(S2))), // [Sut] Close (<LA)
+ Incomplete, // [Root] Open
+ Object(Foo::Attr(OpenSpan(S1, N))), // [Root@] Close (>LA)
+ Object(Foo::Close(CloseSpan::empty(S2))), // [Root] Close (<LA)
+ ]),
+ Sut::parse(toks.into_iter()).collect(),
+ );
+}
+
+// Match on a namespace prefix rather than a static QName.
+#[test]
+fn empty_element_ns_prefix() {
+ #[derive(Debug, PartialEq, Eq)]
+ struct Foo(QName);
+ impl Object for Foo {}
+
+ ele_parse! {
+ enum Sut;
+ type Object = Foo;
+
+ // This matches `c:*`.
+ Root := NS_C(qname, _) {
+ @ {} => Foo(qname),
+ };
+ }
+
+ let toks = vec![
+ // Just some `c:*`.
+ XirfToken::Open(QN_C_EQ, OpenSpan(S1, N), Depth(0)),
+ XirfToken::Close(None, CloseSpan::empty(S2), Depth(0)),
+ ];
+
+ assert_eq!(
+ Ok(vec![
+ Parsed::Incomplete, // [Root] Open
+ Parsed::Object(Foo(QN_C_EQ)), // [Root@] Close (>LA)
+ Parsed::Incomplete, // [Root] Close (<LA)
]),
Sut::parse(toks.into_iter()).collect(),
);
}
#[test]
+fn empty_element_ns_prefix_nomatch() {
+ #[derive(Debug, PartialEq, Eq)]
+ struct Foo;
+ impl Object for Foo {}
+
+ ele_parse! {
+ enum Sut;
+ type Object = Foo;
+
+ // This matches `c:*`.
+ Root := NS_C {
+ @ {} => Foo,
+ };
+ }
+
+ let span = OpenSpan(S1, N);
+ // Non `c:*` element.
+ let unexpected = QN_PACKAGE;
+
+ let toks = vec![
+ XirfToken::Open(unexpected, span, Depth(0)),
+ XirfToken::Close(None, CloseSpan::empty(S2), Depth(0)),
+ ];
+
+ let mut sut = Sut::parse(toks.into_iter());
+
+ let err = sut.next().unwrap().unwrap_err();
+ assert_eq!(
+ ParseError::StateError(<Sut as ParseState>::Error::Root(
+ <Root as ParseState>::Error::UnexpectedEle(
+ unexpected,
+ span.name_span()
+ )
+ )),
+ err,
+ );
+}
+
+// When a QName matches a namespace prefix,
+// that specific QName should be used in subsequent errors,
+// such as when expecting a closing tag.
+#[test]
+fn empty_element_ns_prefix_invalid_close_contains_matching_qname() {
+ #[derive(Debug, PartialEq, Eq)]
+ struct Foo;
+ impl Object for Foo {}
+
+ ele_parse! {
+ enum Sut;
+ type Object = Foo;
+
+ // This matches `c:*`.
+ Root := NS_C {
+ @ {} => Foo,
+ };
+ }
+
+ let unexpected = QN_C_GT;
+ let span_unexpected = OpenSpan(S2, N);
+
+ let toks = vec![
+ // Just some `c:*`.
+ XirfToken::Open(QN_C_EQ, OpenSpan(S1, N), Depth(0)),
+ // We're not expecting a child.
+ XirfToken::Open(unexpected, span_unexpected, Depth(1)),
+ ];
+
+ let mut sut = Sut::parse(toks.into_iter());
+
+ // The opening tag parses fine,
+ // and the unexpected tag successfully terminates attribute parsing.
+ assert_eq!(sut.next(), Some(Ok(Parsed::Incomplete))); // [Root] Open
+ assert_eq!(sut.next(), Some(Ok(Parsed::Object(Foo)))); // [Root@] Open (>LA)
+
+ // But then consuming the LA will produce an error,
+ // since we were not expecting a child.
+ let err = sut.next().unwrap().unwrap_err();
+ assert_eq!(
+ ParseError::StateError(<Sut as ParseState>::Error::Root(
+ <Root as ParseState>::Error::CloseExpected(
+ // Verify that the error includes the QName that actually matched.
+ QN_C_EQ,
+ OpenSpan(S1, N),
+ XirfToken::Open(unexpected, span_unexpected, Depth(1)),
+ )
+ )),
+ err,
+ );
+}
+
+// Static, aggregate attribute objects.
+#[test]
fn empty_element_with_attr_bindings() {
#[derive(Debug, PartialEq, Eq)]
struct Foo(SymbolId, SymbolId, (Span, Span));
impl Object for Foo {}
+ #[derive(Debug, PartialEq, Eq)]
+ struct AttrVal(Attr);
+
+ impl TryFrom<Attr> for AttrVal {
+ // Type must match AttrValueError on `ele_parse!`
+ type Error = AttrValueError;
+
+ fn try_from(attr: Attr) -> Result<Self, Self::Error> {
+ Ok(AttrVal(attr))
+ }
+ }
+
+ #[derive(Debug, PartialEq)]
+ enum AttrValueError {}
+
+ impl Error for AttrValueError {
+ fn source(&self) -> Option<&(dyn Error + 'static)> {
+ None
+ }
+ }
+
+ impl Display for AttrValueError {
+ fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+ write!(f, "test AttrValueError")
+ }
+ }
+
+ impl Diagnostic for AttrValueError {
+ fn describe(&self) -> Vec<crate::diagnose::AnnotatedSpan> {
+ vec![]
+ }
+ }
+
ele_parse! {
+ enum Sut;
+
+ // AttrValueError should be passed to `attr_parse!`
+ // (which is invoked by `ele_parse!`)
+ // as ValueError.
+ type AttrValueError = AttrValueError;
+
type Object = Foo;
// In practice we wouldn't actually use Attr
// (we'd use an appropriate newtype),
// but for the sake of this test we'll keep things simple.
- Sut := QN_PACKAGE {
+ Root := QN_PACKAGE {
@ {
- name: (QN_NAME) => Attr,
- value: (QN_VALUE) => Attr,
+ name: (QN_NAME) => AttrVal,
+ value: (QN_VALUE) => AttrVal,
} => Foo(
- name.value(),
- value.value(),
- (name.attr_span().value_span(), value.attr_span().value_span())
+ name.0.value(),
+ value.0.value(),
+ (name.0.attr_span().value_span(), value.0.attr_span().value_span())
),
- }
+ };
}
let name_val = "bar".into();
@@ -201,17 +374,171 @@ fn empty_element_with_attr_bindings() {
);
}
+// This only tests one scenario under which attribute parsing may fail
+// (others are tested with `attr_parse!`).
+// Failure to parse an attribute is considered a failure at the element
+// level and recovery will skip the entire element.
+#[test]
+fn element_with_failed_attr_parsing() {
+ #[derive(Debug, PartialEq, Eq)]
+ enum Foo {
+ Open,
+ Close,
+ Child,
+ }
+
+ impl crate::parse::Object for Foo {}
+
+ const QN_ROOT: QName = QN_PACKAGE;
+ const QN_CHILD: QName = QN_DIM;
+
+ ele_parse! {
+ enum Sut;
+ type Object = Foo;
+
+ Root := QN_ROOT {
+ @ {
+ _name: (QN_NAME) => Attr,
+ } => Foo::Open,
+
+ // Important to check that this is not emitted.
+ / => Foo::Close,
+ };
+
+ Child := QN_CHILD {
+ @ {} => Foo::Child,
+ };
+ }
+
+ let toks = vec![
+ XirfToken::Open(QN_ROOT, OpenSpan(S1, N), Depth(0)),
+ // Child elements should be ignored.
+ XirfToken::Open(QN_CHILD, OpenSpan(S4, N), Depth(1)),
+ XirfToken::Close(None, CloseSpan::empty(S5), Depth(1)),
+ // Recovery ends at the closing tag.
+ XirfToken::Close(Some(QN_ROOT), CloseSpan::empty(S6), Depth(0)),
+ ];
+
+ let mut sut = Sut::parse(toks.into_iter());
+
+ use Parsed::*;
+
+ // Root will open normally.
+ assert_eq!(sut.next(), Some(Ok(Incomplete))); // [Root] Root Open
+
+ // But the child will result in an error because we have not provided a
+ // required attribute.
+ let err = sut.next().unwrap().unwrap_err();
+ assert_matches!(
+ err,
+ ParseError::StateError(<Sut as ParseState>::Error::Root(
+ <Root as ParseState>::Error::Attrs(..)
+ )),
+ ); // [Root] Child Open (>LA)
+
+ // The remaining tokens should be ignored and we should finish parsing.
+ // Since the opening object was not emitted,
+ // we must not emit the closing.
+ assert_eq!(
+ Ok(vec![
+ Incomplete, // [Root!] Child Open (<LA)
+ Incomplete, // [Root!] Child Close
+ Incomplete, // [Root] Root Close
+ ]),
+ sut.collect(),
+ );
+}
+
+// Rather than using aggregate attributes,
+// `[test]` allows for dynamic streaming attribute parsing.
+// This is necessary for elements like short-hand template applications.
+#[test]
+fn element_with_streaming_attrs() {
+ #[derive(Debug, PartialEq, Eq)]
+ enum Foo {
+ Open,
+ Attr(Attr),
+ Child,
+ Close,
+ }
+
+ impl crate::parse::Object for Foo {}
+
+ const QN_ROOT: QName = QN_PACKAGE;
+ const QN_CHILD: QName = QN_DIM;
+
+ ele_parse! {
+ enum Sut;
+ type Object = Foo;
+
+ Root := QN_ROOT {
+ // symbol soup
+ @ {} => Foo::Open,
+ / => Foo::Close,
+
+ // This binds all attributes in place of `@ {}` above.
+ [attr](attr) => Foo::Attr(attr),
+
+ Child,
+ };
+
+ Child := QN_CHILD {
+ @ {} => Foo::Child,
+ };
+ }
+
+ let attr1 = Attr(QN_NAME, "one".into(), AttrSpan(S2, S3));
+ let attr2 = Attr(QN_TYPE, "two".into(), AttrSpan(S3, S4));
+
+ let toks = vec![
+ XirfToken::Open(QN_ROOT, OpenSpan(S1, N), Depth(0)),
+ // These attributes should stream,
+ // but only _after_ having emitted the opening object from `@ {}`.
+ XirfToken::Attr(attr1.clone()),
+ XirfToken::Attr(attr2.clone()),
+ // A child should halt attribute parsing just the same as `@ {}`
+ // would without the `[text]` special form.
+ XirfToken::Open(QN_CHILD, OpenSpan(S5, N), Depth(1)),
+ XirfToken::Close(None, CloseSpan::empty(S6), Depth(1)),
+ XirfToken::Close(Some(QN_ROOT), CloseSpan(S2, N), Depth(0)),
+ ];
+
+ // Unlike other test cases,
+ // rather than attribute parsing yielding a single object,
+ // we will see both the `@ {}` object _and_ individual attributes
+ // from the `[attr]` map.
+ // Since we are not aggregating,
+ // and since streaming attributes must be emitted _after_ the opening
+ // object to ensure proper nesting in the downstream IR,
+ // the `@ {}` object is emitted immediately upon opening instead of
+ // emitting an incomplete parse.
+ use Parsed::*;
+ assert_eq!(
+ Ok(vec![
+ Object(Foo::Open), // [Root] Root Open
+ Object(Foo::Attr(attr1)), // [Root] attr1
+ Object(Foo::Attr(attr2)), // [Root] attr2
+ Incomplete, // [Child] Child Open (<LA)
+ Object(Foo::Child), // [Child@] Child Close (>LA)
+ Incomplete, // [Child] Child Close (<LA)
+ Object(Foo::Close), // [Root] Root Close
+ ]),
+ Sut::parse(toks.into_iter()).collect(),
+ );
+}
+
// An unexpected element produces an error for the offending token and
// then employs a recovery strategy so that parsing may continue.
#[test]
fn unexpected_element() {
ele_parse! {
+ enum Sut;
type Object = ();
- Sut := QN_PACKAGE {
+ Root := QN_PACKAGE {
// symbol soup
@ {} => (),
- }
+ };
}
let unexpected = "unexpected".unwrap_into();
@@ -261,15 +588,21 @@ fn unexpected_element() {
// was encountered
// (which was expected),
// but to the fact that the name was not the one expected.
+ let err = sut.next().unwrap().unwrap_err();
assert_eq!(
- // TODO: This references generated identifiers.
- Some(Err(ParseError::StateError(SutError_::UnexpectedEle_(
- unexpected,
- span.name_span()
- )))),
- sut.next(),
+ ParseError::StateError(<Sut as ParseState>::Error::Root(
+ <Root as ParseState>::Error::UnexpectedEle(
+ unexpected,
+ span.name_span()
+ )
+ )),
+ err,
);
+ // The diagnostic should describe the name of the element as being
+ // invalid.
+ assert_eq!(err.describe()[0].span(), span.name_span());
+
// We should have now entered a recovery mode whereby we discard
// input until we close the element that introduced the error.
assert_eq!(Some(Ok(Parsed::Incomplete)), sut.next()); // Attr
@@ -301,17 +634,18 @@ fn single_child_element() {
impl Object for Foo {}
ele_parse! {
+ enum Sut;
type Object = Foo;
- Sut := QN_PACKAGE {
+ Root := QN_PACKAGE {
@ {} => Foo::RootAttr,
Child,
- }
+ };
Child := QN_CLASSIFY {
@ {} => Foo::ChildAttr,
- }
+ };
}
let toks = vec![
@@ -323,12 +657,60 @@ fn single_child_element() {
assert_eq!(
Ok(vec![
- Parsed::Incomplete, // [Sut] Root Open
- Parsed::Object(Foo::RootAttr), // [Sut@] Child Open (>LA)
+ Parsed::Incomplete, // [Root] Root Open
+ Parsed::Object(Foo::RootAttr), // [Root@] Child Open (>LA)
Parsed::Incomplete, // [Child] Child Open (<LA)
Parsed::Object(Foo::ChildAttr), // [Child@] Child Close (>LA)
Parsed::Incomplete, // [Child] Child Close (<LA)
- Parsed::Incomplete, // [Sut] Root Close
+ Parsed::Incomplete, // [Root] Root Close
+ ]),
+ Sut::parse(toks.into_iter()).collect(),
+ );
+}
+
+// Since all NTs are zero-or-more,
+// we should accept when an expecting child is missing
+// (when we receive `Close` instead of an `Open` for the child).
+#[test]
+fn single_child_element_missing() {
+ #[derive(Debug, PartialEq, Eq)]
+ enum Foo {
+ Root,
+ Child,
+ }
+
+ impl Object for Foo {}
+
+ ele_parse! {
+ enum Sut;
+ type Object = Foo;
+
+ Root := QN_PACKAGE {
+ @ {} => Foo::Root,
+
+ // Expected,
+ // but will not be provided.
+ Child,
+ };
+
+ // We never yield this.
+ Child := QN_CLASSIFY {
+ @ {} => Foo::Child,
+ };
+ }
+
+ let toks = vec![
+ XirfToken::Open(QN_PACKAGE, OpenSpan(S1, N), Depth(0)),
+ // Missing child,
+ // which should be okay.
+ XirfToken::Close(Some(QN_PACKAGE), CloseSpan(S4, N), Depth(0)),
+ ];
+
+ assert_eq!(
+ Ok(vec![
+ Parsed::Incomplete, // [Root] Root Open
+ Parsed::Object(Foo::Root), // [Root@] Root Close (<LA)
+ Parsed::Incomplete, // [Root] Root Close (>LA)
]),
Sut::parse(toks.into_iter()).collect(),
);
@@ -353,29 +735,30 @@ fn multiple_child_elements_sequential() {
impl crate::parse::Object for Foo {}
ele_parse! {
+ enum Sut;
type Object = Foo;
- Sut := QN_PACKAGE(ospan) {
+ Root := QN_PACKAGE(_, ospan) {
@ {} => Foo::RootOpen(ospan.span()),
/(cspan) => Foo::RootClose(cspan.span()),
// Order matters here.
ChildA,
ChildB,
- }
+ };
// Demonstrates that span identifier bindings are scoped to the
// nonterminal block
// (so please keep the identifiers the same as above).
- ChildA := QN_CLASSIFY(ospan) {
+ ChildA := QN_CLASSIFY(_, ospan) {
@ {} => Foo::ChildAOpen(ospan.span()),
/(cspan) => Foo::ChildAClose(cspan.span()),
- }
+ };
ChildB := QN_EXPORT {
@ {} => Foo::ChildBOpen,
/ => Foo::ChildBClose,
- }
+ };
}
let toks = vec![
@@ -392,20 +775,117 @@ fn multiple_child_elements_sequential() {
use Parsed::*;
assert_eq!(
Ok(vec![
- Incomplete, // [Sut] Root Open
- Object(Foo::RootOpen(S1)), // [Sut@] ChildA Open (>LA)
+ Incomplete, // [Root] Root Open
+ Object(Foo::RootOpen(S1)), // [Root@] ChildA Open (>LA)
Incomplete, // [ChildA] ChildA Open (<LA)
Object(Foo::ChildAOpen(S2)), // [ChildA@] ChildA Close (>LA)
Object(Foo::ChildAClose(S3)), // [ChildA] ChildA Close (<LA)
Incomplete, // [ChildB] ChildB Open
Object(Foo::ChildBOpen), // [ChildB@] ChildB Close (>LA)
Object(Foo::ChildBClose), // [ChildB] ChildB Close (<LA)
- Object(Foo::RootClose(S5)), // [Sut] Root Close
+ Object(Foo::RootClose(S5)), // [Root] Root Close
]),
Sut::parse(toks.into_iter()).collect(),
);
}
+// Used by below tests.
+fn x_ignored_between_elements(tok: XirfToken<RefinedText>) {
+ #[derive(Debug, PartialEq, Eq)]
+ enum Foo {
+ Root,
+ A,
+ B,
+ }
+
+ impl crate::parse::Object for Foo {}
+
+ const QN_SUT: QName = QN_PACKAGE;
+ const QN_A: QName = QN_CLASSIFY;
+ const QN_B: QName = QN_EXPORT;
+
+ ele_parse! {
+ enum Sut;
+ type Object = Foo;
+
+ Root := QN_SUT {
+ @ {} => Foo::Root,
+
+ A,
+ B,
+ };
+
+ A := QN_A {
+ @ {} => Foo::A,
+ };
+
+ B := QN_B {
+ @ {} => Foo::B,
+ };
+ }
+
+ let toks = vec![
+ // Whitespace before start tag.
+ tok.clone(),
+ XirfToken::Open(QN_SUT, OpenSpan(S1, N), Depth(0)),
+ // Whitespace between children.
+ tok.clone(),
+ XirfToken::Open(QN_A, OpenSpan(S2, N), Depth(1)),
+ XirfToken::Close(None, CloseSpan::empty(S3), Depth(1)),
+ tok.clone(),
+ XirfToken::Open(QN_B, OpenSpan(S3, N), Depth(1)),
+ XirfToken::Close(None, CloseSpan::empty(S4), Depth(1)),
+ tok.clone(),
+ XirfToken::Close(Some(QN_SUT), CloseSpan(S5, N), Depth(0)),
+ // Whitespace after end tag.
+ tok.clone(),
+ ];
+
+ use Parsed::*;
+ assert_eq!(
+ Ok(vec![
+ Incomplete, // [Root] tok
+ Incomplete, // [Root] Root Open
+ Incomplete, // [Root@] tok
+ Object(Foo::Root), // [Root@] A Open (>LA)
+ Incomplete, // [A] A Open (<LA)
+ Object(Foo::A), // [A@] A Close (>LA)
+ Incomplete, // [A] A Close (<LA)
+ Incomplete, // [A] tok
+ Incomplete, // [B] B Open
+ Object(Foo::B), // [B@] B Close (>LA)
+ Incomplete, // [B] B Close (<LA)
+ Incomplete, // [Root] tok
+ Incomplete, // [Root] Root Close
+ Incomplete, // [Root] tok
+ ]),
+ Sut::parse(toks.into_iter()).collect(),
+ );
+}
+
+// Even if we do not accept mixed data
+// (text and elements),
+// whitespace text ought to be accepted and entirely ignored.
+#[test]
+fn whitespace_ignored_between_elements() {
+ x_ignored_between_elements(XirfToken::Text(
+ RefinedText::Whitespace(Whitespace(Text(" ".unwrap_into(), S1))),
+ Depth(0),
+ ));
+}
+
+// Comments have no semantic meaning,
+// and ought not to,
+// because we control the language and can do better.
+#[test]
+fn comments_ignored_between_elements() {
+ x_ignored_between_elements(XirfToken::Comment(
+ "comment".into(),
+ S1,
+ Depth(0),
+ ));
+}
+
// TODO: This error recovery seems to be undesirable,
// both consuming an element and skipping the requirement;
// it is beneficial only in showing that recovery is possible and
@@ -417,18 +897,27 @@ fn multiple_child_elements_sequential() {
fn child_error_and_recovery() {
#[derive(Debug, PartialEq, Eq)]
enum Foo {
- Root,
+ RootOpen,
ChildABad, // Will not yield this one.
ChildB,
+ RootClose,
}
- impl Object for Foo {}
+ impl crate::parse::Object for Foo {}
+
+ const QN_ROOT: QName = QN_PACKAGE;
+ const QN_A: QName = QN_CLASSIFY;
+ const QN_B: QName = QN_EXPORT;
ele_parse! {
+ enum Sut;
type Object = Foo;
- Sut := QN_PACKAGE {
- @ {} => Foo::Root,
+ Root := QN_ROOT {
+ @ {} => Foo::RootOpen,
+
+ // Must be emitted if `RootOpen` is to maintain balance.
+ / => Foo::RootClose,
// This is what we're expecting,
// but not what we will provide.
@@ -437,15 +926,15 @@ fn child_error_and_recovery() {
// But we _will_ provide this expected value,
// after error recovery ignores the above.
ChildB,
- }
+ };
- ChildA := QN_CLASSIFY {
+ ChildA := QN_A {
@ {} => Foo::ChildABad,
- }
+ };
- ChildB := QN_EXPORT {
+ ChildB := QN_B {
@ {} => Foo::ChildB,
- }
+ };
}
let unexpected = "unexpected".unwrap_into();
@@ -453,7 +942,7 @@ fn child_error_and_recovery() {
let toks = vec![
// The first token is the expected root.
- XirfToken::Open(QN_PACKAGE, OpenSpan(S1, N), Depth(0)),
+ XirfToken::Open(QN_ROOT, OpenSpan(S1, N), Depth(0)),
// --> But this one is unexpected (name).
XirfToken::Open(unexpected, span, Depth(1)),
// And so we should ignore it up to this point.
@@ -465,45 +954,58 @@ fn child_error_and_recovery() {
// for `ChildA`,
// which means that we expect `ChildB`.
// Parsing continues normally.
- XirfToken::Open(QN_EXPORT, OpenSpan(S4, N), Depth(1)),
+ XirfToken::Open(QN_B, OpenSpan(S4, N), Depth(1)),
XirfToken::Close(None, CloseSpan::empty(S5), Depth(1)),
- XirfToken::Close(Some(QN_PACKAGE), CloseSpan(S4, N), Depth(0)),
+ XirfToken::Close(Some(QN_ROOT), CloseSpan(S4, N), Depth(0)),
];
let mut sut = Sut::parse(toks.into_iter());
+ use Parsed::*;
+
// The first token is expected,
- // and we enter attribute parsing for `Sut`.
- assert_eq!(Some(Ok(Parsed::Incomplete)), sut.next()); // [Sut] Open 0
+ // and we enter attribute parsing for `Root`.
+ assert_eq!(Some(Ok(Incomplete)), sut.next()); // [Root] Open 0
// The second token _will_ be unexpected,
- // but we're parsing attributes for `Sut`,
+ // but we're parsing attributes for `Root`,
// so we don't know that yet.
// Instead,
// the `Open` ends attribute parsing and yields a token of lookahead.
assert_eq!(
- Some(Ok(Parsed::Object(Foo::Root))), // [Sut@] Open 1 (>LA)
+ Some(Ok(Object(Foo::RootOpen))), // [Root@] Open 1 (>LA)
sut.next()
);
// The token of lookahead (`Open`) is unexpected for `ChildA`,
- // which must throw an error and enter a recovery state.
+ // when then skips to `ChildB`,
+ // which is _also_ not expecting it and must throw an error and enter
+ // a recovery state.
// The token should be consumed and returned in the error,
// _not_ produced as a token of lookahead,
// since we do not want to reprocess bad input.
+ let err = sut.next().unwrap().unwrap_err();
assert_eq!(
- // TODO: This references generated identifiers.
- Some(Err(ParseError::StateError(SutError_::ChildA(
- ChildAError_::UnexpectedEle_(unexpected, span.name_span())
- )))),
- sut.next(),
+ err,
+ ParseError::StateError(<Sut as ParseState>::Error::ChildB(
+ <ChildB as ParseState>::Error::UnexpectedEle(
+ unexpected,
+ span.name_span(),
+ )
+ )),
);
+ // TODO: Can't deal with this until we know exactly what error we'll
+ // have above;
+ // see above TODO.
+ // Diagnostic message should be delegated to the child.
+ assert_eq!(err.describe()[0].span(), span.name_span());
+
// The next token is the self-closing `Close` for the unexpected opening
// tag.
// Since we are in recovery,
// it should be ignored.
- assert_eq!(Some(Ok(Parsed::Incomplete)), sut.next()); // [ChildA!] Close 1
+ assert_eq!(Some(Ok(Incomplete)), sut.next()); // [ChildA!] Close 1
// Having recovered from the error,
// we should happily accept the remaining tokens starting with
@@ -513,11 +1015,1469 @@ fn child_error_and_recovery() {
// but that's not what we're doing yet.
assert_eq!(
Ok(vec![
- Parsed::Incomplete, // [ChildB] Open 1
- Parsed::Object(Foo::ChildB), // [ChildB@] Close 1 (>LA)
- Parsed::Incomplete, // [ChildB] Close 1 (<LA)
- Parsed::Incomplete, // [Sut] Close 0
+ Incomplete, // [ChildB] Open 1
+ Object(Foo::ChildB), // [ChildB@] Close 1 (>LA)
+ Incomplete, // [ChildB] Close 1 (<LA)
+ Object(Foo::RootClose), // [Root] Close 0
]),
sut.collect()
);
}
+
+// This differs from the above test in that we encounter unexpected elements
+// when we expected to find the end tag.
+// This means that the element _name_ is not in error,
+// but the fact that an element exists _at all_ is.
+#[test]
+fn child_error_and_recovery_at_close() {
+ #[derive(Debug, PartialEq, Eq)]
+ enum Foo {
+ Open,
+ Close,
+ }
+
+ impl Object for Foo {}
+
+ ele_parse! {
+ enum Sut;
+ type Object = Foo;
+
+ Root := QN_PACKAGE {
+ @ {} => Foo::Open,
+ / => Foo::Close,
+ };
+ }
+
+ let unexpected_a = "unexpected a".unwrap_into();
+ let unexpected_b = "unexpected b".unwrap_into();
+ let span_a = OpenSpan(S2, N);
+ let span_b = OpenSpan(S4, N);
+
+ let toks = vec![
+ // The first token is the expected root.
+ XirfToken::Open(QN_PACKAGE, OpenSpan(S1, N), Depth(0)),
+ // Root is now expecting either attributes
+ // (of which there are none),
+ // or a closing element.
+ // In either case,
+ // an opening element is entirely unexpected.
+ XirfToken::Open(unexpected_a, span_a, Depth(1)),
+ // And so we should ignore it up to this point.
+ XirfToken::Close(None, CloseSpan::empty(S3), Depth(1)),
+ // Let's do the same thing again.
+ // It may be ideal to have another error exposed for each individual
+ // element that is unexpected,
+ // but for now the parser is kept simple and we simply continue
+ // to ignore elements until we reach the close.
+ XirfToken::Open(unexpected_b, span_b, Depth(1)),
+ // And so we should ignore it up to this point.
+ XirfToken::Close(None, CloseSpan::empty(S5), Depth(1)),
+ // Let's mix it up a bit with some text and make sure that is
+ // ignored too.
+ XirfToken::Text(
+ RefinedText::Unrefined(Text("unexpected text".unwrap_into(), S5)),
+ Depth(1),
+ ),
+ // Having recovered from the above tokens,
+ // this will end parsing for `Root` as expected.
+ XirfToken::Close(Some(QN_PACKAGE), CloseSpan(S6, N), Depth(0)),
+ ];
+
+ let mut sut = Sut::parse(toks.into_iter());
+
+ // The first token is expected,
+ // and we enter attribute parsing for `Root`.
+ assert_eq!(Some(Ok(Parsed::Incomplete)), sut.next()); // [Root] Open 0
+
+ // The second token _will_ be unexpected,
+ // but we're parsing attributes for `Root`,
+ // so we don't know that yet.
+ // Instead,
+ // the `Open` ends attribute parsing and yields a token of lookahead.
+ assert_eq!(
+ Some(Ok(Parsed::Object(Foo::Open))), // [Root@] Open 1 (>LA)
+ sut.next()
+ );
+
+ // The token of lookahead (`Open`) is unexpected for `Root`,
+ // which is expecting `Close`.
+ // The token should be consumed and returned in the error,
+ // _not_ produced as a token of lookahead,
+ // since we do not want to reprocess bad input.
+ let err = sut.next().unwrap().unwrap_err();
+ assert_eq!(
+ ParseError::StateError(<Sut as ParseState>::Error::Root(
+ <Root as ParseState>::Error::CloseExpected(
+ QN_PACKAGE,
+ OpenSpan(S1, N),
+ XirfToken::Open(unexpected_a, span_a, Depth(1)),
+ )
+ )),
+ err,
+ );
+
+ // The diagnostic information should include a reference to where the
+ // element was opened
+ // (so that the user understands what needs closing),
+ // followed by the span of the token in error
+ // (which naturally comes after the opening tag).
+ let desc = err.describe();
+ assert_eq!(desc[0].span(), S1); // Span of opening tag we want closed
+ assert_eq!(desc[1].span(), span_a.span()); // Span of error
+
+ // The recovery state must not be in an accepting state,
+ // because we didn't close at the root depth yet.
+ let (mut sut, _) =
+ sut.finalize().expect_err("recovery must not be accepting");
+
+ // The next token is the self-closing `Close` for the unexpected opening
+ // tag.
+ // Since we are in recovery,
+ // it should be ignored.
+ assert_eq!(Some(Ok(Parsed::Incomplete)), sut.next()); // [Root!] Close 1
+
+ // We are still in recovery,
+ // and so we should still be ignoring tokens.
+ // It may be more ideal to throw individual errors per unexpected
+ // element
+ // (though doing so may be noisy if there is a lot),
+ // but for now the parser is kept simple.
+ assert_eq!(Some(Ok(Parsed::Incomplete)), sut.next()); // [Root!] Open 1
+ assert_eq!(Some(Ok(Parsed::Incomplete)), sut.next()); // [Root!] Close 1
+ assert_eq!(Some(Ok(Parsed::Incomplete)), sut.next()); // [Root!] Text
+
+ // Having recovered from the error,
+ // we should now be able to close successfully.
+ assert_eq!(Some(Ok(Parsed::Object(Foo::Close))), sut.next());
+ sut.finalize()
+ .expect("recovery must complete in an accepting state");
+}
+
+// A nonterminal of the form `(A | ... | Z)` should accept the element of
+// any of the inner nonterminals.
+#[test]
+fn sum_nonterminal_accepts_any_valid_element() {
+ #[derive(Debug, PartialEq, Eq)]
+ enum Foo {
+ A,
+ B,
+ C,
+ }
+
+ impl crate::parse::Object for Foo {}
+
+ // QNames don't matter as long as they are unique.
+ const QN_A: QName = QN_PACKAGE;
+ const QN_B: QName = QN_CLASSIFY;
+ const QN_C: QName = QN_EXPORT;
+
+ ele_parse! {
+ enum Sut;
+ type Object = Foo;
+
+ Root := (A | B | C);
+
+ A := QN_A {
+ @ {} => Foo::A,
+ };
+
+ B := QN_B {
+ @ {} => Foo::B,
+ };
+
+ C := QN_C {
+ @ {} => Foo::C,
+ };
+ }
+
+ use Parsed::*;
+ use XirfToken::{Close, Open};
+
+ // Try each in turn with a fresh instance of `Root`.
+ [(QN_A, Foo::A), (QN_B, Foo::B), (QN_C, Foo::C)]
+ .into_iter()
+ .for_each(|(qname, obj)| {
+ let toks = vec![
+ Open(qname, OpenSpan(S1, N), Depth(0)),
+ Close(None, CloseSpan::empty(S2), Depth(0)),
+ ];
+
+ assert_eq!(
+ Ok(vec![
+ Incomplete, // [X] Open
+ Object(obj), // [X@] Close (>LA)
+ Incomplete, // [X] Close
+ ]),
+ Sut::parse(toks.into_iter()).collect(),
+ );
+ });
+}
+
+// Whitespace should be accepted around elements.
+fn sum_nonterminal_accepts_x(tok: XirfToken<RefinedText>) {
+ #[derive(Debug, PartialEq, Eq)]
+ enum Foo {
+ A,
+ B,
+ }
+
+ impl crate::parse::Object for Foo {}
+
+ // QNames don't matter as long as they are unique.
+ const QN_A: QName = QN_PACKAGE;
+ const QN_B: QName = QN_CLASSIFY;
+
+ ele_parse! {
+ enum Sut;
+ type Object = Foo;
+
+ // Sum type requires two NTs but we only use A.
+ Root := (A | B);
+
+ A := QN_A {
+ @ {} => Foo::A,
+ };
+
+ B := QN_B {
+ @ {} => Foo::B,
+ };
+ }
+
+ use Parsed::*;
+ use XirfToken::{Close, Open};
+
+ // Try each in turn with a fresh instance of `Root`.
+ let toks = vec![
+ // Leading.
+ tok.clone(),
+ Open(QN_A, OpenSpan(S1, N), Depth(0)),
+ Close(None, CloseSpan::empty(S2), Depth(0)),
+ // Trailing.
+ tok.clone(),
+ ];
+
+ assert_eq!(
+ Ok(vec![
+ Incomplete, // [A] tok
+ Incomplete, // [A] Open
+ Object(Foo::A), // [A@] Close (>LA)
+ Incomplete, // [A] Close
+ Incomplete, // [A] tok
+ ]),
+ Sut::parse(toks.into_iter()).collect(),
+ );
+}
+
+#[test]
+fn sum_nonterminal_accepts_whitespace() {
+ sum_nonterminal_accepts_x(XirfToken::Text(
+ RefinedText::Whitespace(Whitespace(Text(" ".unwrap_into(), S1))),
+ Depth(0),
+ ));
+}
+
+#[test]
+fn sum_nonterminal_accepts_comments() {
+ sum_nonterminal_accepts_x(XirfToken::Comment(
+ "comment".into(),
+ S1,
+ Depth(0),
+ ));
+}
+
+// Compose sum NTs with a parent element.
+#[test]
+fn sum_nonterminal_as_child_element() {
+ #[derive(Debug, PartialEq, Eq)]
+ enum Foo {
+ Open(QName),
+ Close(QName),
+ }
+
+ impl crate::parse::Object for Foo {}
+
+ // QNames don't matter as long as they are unique.
+ const QN_ROOT: QName = QN_PACKAGE;
+ const QN_A: QName = QN_DIM;
+ const QN_B: QName = QN_CLASSIFY;
+ const QN_C: QName = QN_VALUE;
+
+ ele_parse! {
+ enum Sut;
+ type Object = Foo;
+
+ Root := QN_PACKAGE {
+ @ {} => Foo::Open(QN_ROOT),
+ / => Foo::Close(QN_ROOT),
+
+ // A|B followed by a C.
+ AB,
+ C,
+ };
+
+ AB := (A | B);
+
+ A := QN_A {
+ @ {} => Foo::Open(QN_A),
+ / => Foo::Close(QN_A),
+ };
+
+ B := QN_B {
+ @ {} => Foo::Open(QN_B),
+ / => Foo::Close(QN_B),
+ };
+
+ C := QN_C {
+ @ {} => Foo::Open(QN_C),
+ / => Foo::Close(QN_C),
+ };
+ }
+
+ let toks = vec![
+ XirfToken::Open(QN_ROOT, OpenSpan(S1, N), Depth(0)),
+ // A
+ XirfToken::Open(QN_A, OpenSpan(S2, N), Depth(1)),
+ XirfToken::Close(None, CloseSpan::empty(S3), Depth(1)),
+ // B
+ XirfToken::Open(QN_C, OpenSpan(S3, N), Depth(1)),
+ XirfToken::Close(None, CloseSpan::empty(S4), Depth(1)),
+ XirfToken::Close(Some(QN_ROOT), CloseSpan(S5, N), Depth(0)),
+ ];
+
+ use Parsed::*;
+
+ assert_eq!(
+ Ok(vec![
+ Incomplete, // [Root] Root Open
+ Object(Foo::Open(QN_ROOT)), // [Root@] A Open (>LA)
+ Incomplete, // [A] A Open (<LA)
+ Object(Foo::Open(QN_A)), // [A@] A Close (>LA)
+ Object(Foo::Close(QN_A)), // [A] A Close (<LA)
+ Incomplete, // [C] B Open
+ Object(Foo::Open(QN_C)), // [C@] B Close (>LA)
+ Object(Foo::Close(QN_C)), // [C] B Close (<LA)
+ Object(Foo::Close(QN_ROOT)), // [Root] Root Close
+ ]),
+ Sut::parse(toks.into_iter()).collect(),
+ );
+}
+
+#[test]
+fn sum_nonterminal_error_recovery() {
+ #[derive(Debug, PartialEq, Eq)]
+ enum Foo {
+ A,
+ B,
+ }
+
+ impl crate::parse::Object for Foo {}
+
+ // QNames don't matter as long as they are unique.
+ const QN_A: QName = QN_PACKAGE;
+ const QN_B: QName = QN_CLASSIFY;
+ let unexpected: QName = "unexpected".unwrap_into();
+
+ ele_parse! {
+ enum Sut;
+ type Object = Foo;
+
+ Root := (A | B);
+
+ A := QN_A {
+ @ {} => Foo::A,
+ };
+
+ B := QN_B {
+ @ {} => Foo::B,
+ };
+ }
+
+ // Something >0 just to assert that we're actually paying attention to
+ // it when consuming tokens during recovery.
+ let depth = Depth(5);
+ let depth_child = Depth(6);
+
+ // An extra token to yield after we're done parsing to ensure that we
+ // properly yield a dead state transition.
+ let dead_tok = XirfToken::Open(QN_NAME, OpenSpan(S5, N), depth);
+
+ let toks = vec![
+ // Neither A nor B,
+ // which will produce an error and enter recovery.
+ XirfToken::Open(unexpected, OpenSpan(S1, N), depth),
+ // A child element to be ignored,
+ // to ensure that its closing tag will not halt recovery
+ // prematurely.
+ // This further tests that it's too late to provide a valid opening
+ // token
+ // (which is good because we're not at the right depth).
+ XirfToken::Open(QN_A, OpenSpan(S2, N), depth_child),
+ XirfToken::Close(None, CloseSpan::empty(S3), depth_child),
+ // Closing token for the bad element at the corresponding depth,
+ // which will end recovery.
+ XirfToken::Close(Some(unexpected), CloseSpan(S4, N), depth),
+ // Should result in a dead state post-recovery,
+ // just as we would expect if we _didn't_ recover.
+ dead_tok.clone(),
+ ];
+
+ let mut sut = Sut::parse(toks.into_iter());
+
+ // The first token of input is the unexpected element,
+ // and so should result an error.
+ // The referenced span should be the _name_ of the element,
+ // not the tag,
+ // since the error is referring not to the fact that an element
+ // was encountered
+ // (which was expected),
+ // but to the fact that the name was not the one expected.
+ let err = sut.next().unwrap().unwrap_err();
+ assert_eq!(
+ err,
+ ParseError::StateError(<Sut as ParseState>::Error::Root(
+ <Root as ParseState>::Error::UnexpectedEle(
+ unexpected,
+ OpenSpan(S1, N).name_span(),
+ Default::default(),
+ )
+ )),
+ );
+
+ // Diagnostic message should describe the name of the element.
+ assert_eq!(err.describe()[0].span(), OpenSpan(S1, N).name_span());
+
+ // We should have now entered a recovery mode whereby we discard
+ // input until we close the element that introduced the error.
+ assert_eq!(sut.next(), Some(Ok(Parsed::Incomplete))); // Open child
+ assert_eq!(sut.next(), Some(Ok(Parsed::Incomplete))); // Close child
+
+ // The recovery state must not be in an accepting state,
+ // because we didn't close at the root depth yet.
+ let (mut sut, _) =
+ sut.finalize().expect_err("recovery must not be accepting");
+
+ // The next token should close the element that is in error,
+ // and bring us into an accepting state.
+ // But since we are not emitting tokens,
+ // we'll still be marked as incomplete.
+ assert_eq!(Some(Ok(Parsed::Incomplete)), sut.next()); // Close root
+
+ // Encountering any tokens post-recovery should result in a dead state
+ // just the same as if we had closed normally.
+ let err = sut.next().unwrap().unwrap_err();
+ assert_matches!(
+ err,
+ ParseError::UnexpectedToken(given_tok, _) if given_tok == dead_tok,
+ );
+
+ // Having otherwise completed successfully,
+ // and now yielding dead states,
+ // we must indicate that parsing has completed successfully so that
+ // the caller knows that it can safely move on.
+ sut.finalize()
+ .expect("recovery must complete in an accepting state");
+}
+
+#[test]
+fn child_repetition() {
+ #[derive(Debug, PartialEq, Eq)]
+ enum Foo {
+ RootOpen,
+ ChildOpen(QName),
+ ChildClose(QName),
+ RootClose,
+ }
+
+ impl crate::parse::Object for Foo {}
+
+ const QN_ROOT: QName = QN_PACKAGE;
+ const QN_A: QName = QN_DIM;
+ const QN_B: QName = QN_CLASSIFY;
+ const QN_C: QName = QN_EXPORT;
+
+ ele_parse! {
+ enum Sut;
+ type Object = Foo;
+
+ Root := QN_PACKAGE {
+ @ {} => Foo::RootOpen,
+ / => Foo::RootClose,
+
+ ChildA,
+ ChildB,
+ ChildC,
+ };
+
+ ChildA := QN_A {
+ @ {} => Foo::ChildOpen(QN_A),
+ / => Foo::ChildClose(QN_A),
+ };
+
+ ChildB := QN_B {
+ @ {} => Foo::ChildOpen(QN_B),
+ / => Foo::ChildClose(QN_B),
+ };
+
+ ChildC := QN_C {
+ @ {} => Foo::ChildOpen(QN_C),
+ / => Foo::ChildClose(QN_C),
+ };
+ }
+
+ let toks = vec![
+ XirfToken::Open(QN_ROOT, OpenSpan(S1, N), Depth(0)),
+ // ChildA (1)
+ XirfToken::Open(QN_A, OpenSpan(S2, N), Depth(1)),
+ XirfToken::Close(None, CloseSpan::empty(S3), Depth(1)),
+ // ChildA (2)
+ XirfToken::Open(QN_A, OpenSpan(S3, N), Depth(1)),
+ XirfToken::Close(None, CloseSpan::empty(S4), Depth(1)),
+ // ChildB (1)
+ XirfToken::Open(QN_B, OpenSpan(S4, N), Depth(1)),
+ XirfToken::Close(None, CloseSpan::empty(S5), Depth(1)),
+ // ChildB (2)
+ XirfToken::Open(QN_B, OpenSpan(S5, N), Depth(1)),
+ XirfToken::Close(None, CloseSpan::empty(S6), Depth(1)),
+ // ChildC (only)
+ XirfToken::Open(QN_C, OpenSpan(S6, N), Depth(1)),
+ XirfToken::Close(None, CloseSpan::empty(S7), Depth(1)),
+ XirfToken::Close(Some(QN_ROOT), CloseSpan(S8, N), Depth(0)),
+ ];
+
+ use Parsed::*;
+
+ // Note that we cannot observe the handoff after the repeating parsers
+ // below because Parser immediately recur.
+ // For example,
+ // when ChildA has been closed,
+ // it awaits the next token to see if it should reset or if it should
+ // emit a dead state.
+ // If it receives `QN_A`,
+ // then it'll reset.
+ // However,
+ // `QN_B` will cause it to emit `dead` with the `Open` token as
+ // lookahead,
+ // which then gets turned into `Incomplete` with lookahead by
+ // `ParseState::delegate`,
+ // which then causes `Parser` to immediate recur,
+ // masking the `Incomplete` entirely.
+ // And so what we see below is a cleaner,
+ // albeit not entirely honest,
+ // script.
+ //
+ // (Also please note that the above description is true as of the time
+ // of writing,
+ // but it's possible that this comment has not been updated since
+ // then.)
+ assert_eq!(
+ Ok(vec![
+ Incomplete, // [Root] Root Open
+ Object(Foo::RootOpen), // [Root@] ChildA Open (>LA)
+ Incomplete, // [ChildA] ChildA Open (<LA)
+ Object(Foo::ChildOpen(QN_A)), // [ChildA@] ChildA Close (>LA)
+ Object(Foo::ChildClose(QN_A)), // [ChildA] ChildA Close (<LA)
+ Incomplete, // [ChildA] ChildA Open (<LA)
+ Object(Foo::ChildOpen(QN_A)), // [ChildA@] ChildA Close (>LA)
+ Object(Foo::ChildClose(QN_A)), // [ChildA] ChildA Close (<LA)
+ Incomplete, // [ChildB] ChildB Open (<LA)
+ Object(Foo::ChildOpen(QN_B)), // [ChildB@] ChildB Close (>LA)
+ Object(Foo::ChildClose(QN_B)), // [ChildB] ChildB Close (<LA)
+ Incomplete, // [ChildB] ChildB Open (<LA)
+ Object(Foo::ChildOpen(QN_B)), // [ChildB@] ChildB Close (>LA)
+ Object(Foo::ChildClose(QN_B)), // [ChildB] ChildB Close (<LA)
+ Incomplete, // [ChildC] ChildC Open (<LA)
+ Object(Foo::ChildOpen(QN_C)), // [ChildC@] ChildC Close (>LA)
+ Object(Foo::ChildClose(QN_C)), // [ChildC] ChildC Close (<LA)
+ Object(Foo::RootClose), // [Root] Root Close
+ ]),
+ Sut::parse(toks.into_iter()).collect(),
+ );
+}
+
+// Once we transition `(S) -> (S')`,
+// we should not be able to transition back under any circumstance.
+#[test]
+fn child_nt_sequence_no_prev_after_next() {
+ #[derive(Debug, PartialEq, Eq)]
+ enum Foo {
+ Open(QName),
+ Close(QName),
+ }
+
+ impl crate::parse::Object for Foo {}
+
+ const QN_ROOT: QName = QN_PACKAGE;
+ const QN_A: QName = QN_DIM;
+ const QN_B: QName = QN_CLASSIFY;
+
+ ele_parse! {
+ enum Sut;
+ type Object = Foo;
+
+ Root := QN_ROOT {
+ @ {} => Foo::Open(QN_ROOT),
+ / => Foo::Close(QN_ROOT),
+
+ A,
+ B,
+ };
+
+ A := QN_A {
+ @ {} => Foo::Open(QN_A),
+ / => Foo::Close(QN_A),
+ };
+
+ B := QN_B {
+ @ {} => Foo::Open(QN_B),
+ / => Foo::Close(QN_B),
+ };
+ }
+
+ let toks = vec![
+ XirfToken::Open(QN_ROOT, OpenSpan(S1, N), Depth(0)),
+ // A
+ XirfToken::Open(QN_A, OpenSpan(S2, N), Depth(1)),
+ XirfToken::Close(None, CloseSpan::empty(S2), Depth(1)),
+ // A -> A OK
+ XirfToken::Open(QN_A, OpenSpan(S3, N), Depth(1)),
+ XirfToken::Close(None, CloseSpan::empty(S3), Depth(1)),
+ // A -> B
+ XirfToken::Open(QN_B, OpenSpan(S4, N), Depth(1)),
+ XirfToken::Close(None, CloseSpan::empty(S4), Depth(1)),
+ // B -> B OK
+ XirfToken::Open(QN_B, OpenSpan(S4, N), Depth(1)),
+ XirfToken::Close(None, CloseSpan::empty(S4), Depth(1)),
+ // B -> A _not_ OK.
+ XirfToken::Open(QN_A, OpenSpan(S6, N), Depth(1)),
+ XirfToken::Close(None, CloseSpan::empty(S6), Depth(1)),
+ XirfToken::Close(Some(QN_ROOT), CloseSpan(S8, N), Depth(0)),
+ ];
+
+ use Parsed::*;
+
+ assert_eq!(
+ vec![
+ Ok(Incomplete), // [Root] Root Open
+ Ok(Object(Foo::Open(QN_ROOT))), // [Root@] A Open (>LA)
+ Ok(Incomplete), // [A] A Open (<LA)
+ Ok(Object(Foo::Open(QN_A))), // [A@] A Close (>LA)
+ Ok(Object(Foo::Close(QN_A))), // [A] A Close (<LA)
+ Ok(Incomplete), // [A] A Open (<LA)
+ Ok(Object(Foo::Open(QN_A))), // [A@] A Close (>LA)
+ Ok(Object(Foo::Close(QN_A))), // [A] A Close (<LA)
+ Ok(Incomplete), // [B] B Open (<LA)
+ Ok(Object(Foo::Open(QN_B))), // [B@] B Close (>LA)
+ Ok(Object(Foo::Close(QN_B))), // [B] B Close (<LA)
+ Ok(Incomplete), // [B] B Open (<LA)
+ Ok(Object(Foo::Open(QN_B))), // [B@] B Close (>LA)
+ Ok(Object(Foo::Close(QN_B))), // [B] B Close (<LA)
+ Err(ParseError::StateError(<Sut as ParseState>::Error::B(
+ <B as ParseState>::Error::UnexpectedEle(
+ QN_A,
+ OpenSpan(S6, N).name_span()
+ )
+ ))), // [B!] A Open
+ Ok(Incomplete), // [B!] A Close
+ Ok(Object(Foo::Close(QN_ROOT))), // [Root] Root Close
+ ],
+ Sut::parse(toks.into_iter()).collect::<Vec<ParsedResult<Sut>>>(),
+ );
+}
+
+#[test]
+fn child_repetition_invalid_tok_dead() {
+ #[derive(Debug, PartialEq, Eq)]
+ enum Foo {
+ RootOpen,
+ ChildOpen,
+ ChildClose,
+ RootClose,
+ }
+
+ impl crate::parse::Object for Foo {}
+
+ // QNames don't matter as long as they are unique.
+ const QN_ROOT: QName = QN_PACKAGE;
+ const QN_CHILD: QName = QN_DIM;
+ let unexpected: QName = "unexpected".unwrap_into();
+
+ ele_parse! {
+ enum Sut;
+ type Object = Foo;
+
+ Root := QN_PACKAGE {
+ @ {} => Foo::RootOpen,
+ / => Foo::RootClose,
+
+ Child,
+ };
+
+ Child := QN_CHILD {
+ @ {} => Foo::ChildOpen,
+ / => Foo::ChildClose,
+ };
+ }
+
+ let toks = vec![
+ XirfToken::Open(QN_ROOT, OpenSpan(S1, N), Depth(0)),
+ // Child (success)
+ XirfToken::Open(QN_CHILD, OpenSpan(S2, N), Depth(1)),
+ XirfToken::Close(None, CloseSpan::empty(S3), Depth(1)),
+ // unexpected
+ XirfToken::Open(unexpected, OpenSpan(S2, N), Depth(1)),
+ XirfToken::Close(None, CloseSpan::empty(S3), Depth(1)),
+ XirfToken::Close(Some(QN_ROOT), CloseSpan(S8, N), Depth(0)),
+ ];
+
+ let mut sut = Sut::parse(toks.into_iter());
+
+ use Parsed::*;
+
+ let mut next = || sut.next();
+
+ assert_eq!(next(), Some(Ok(Incomplete))); // [Root] Open
+ assert_eq!(next(), Some(Ok(Object(Foo::RootOpen)))); // [Root@] Open >
+ assert_eq!(next(), Some(Ok(Incomplete))); // [Child] Open <
+ assert_eq!(next(), Some(Ok(Object(Foo::ChildOpen)))); // [Child@] Close >
+ assert_eq!(next(), Some(Ok(Object(Foo::ChildClose)))); // [Child] Close <
+
+ // Intuitively,
+ // we may want to enter recovery and ignore the element.
+ // But the problem is that we need to emit a dead state so that other
+ // parsers can handle the input,
+ // because it may simply be the case that our repetition is over.
+ //
+ // Given that dead state and token of lookahead,
+ // `Parser` will immediately recurse to re-process the erroneous
+ // `Open`.
+ // The next state after the `Child` NT is expecting a `Close`,
+ // but upon encountering a `Open` it forces the last NT to perform the
+ // processing,
+ // and so the error will occur on `Child`.
+ assert_eq!(
+ next(),
+ Some(Err(ParseError::StateError(
+ <Sut as ParseState>::Error::Child(
+ <Child as ParseState>::Error::UnexpectedEle(
+ unexpected,
+ OpenSpan(S2, N).name_span()
+ )
+ )
+ ))),
+ );
+
+ // This next token is also ignored as part of recovery.
+ assert_eq!(next(), Some(Ok(Incomplete))); // [Root] Child Close
+
+ // Finally,
+ // `Root` encounters its expected `Close` and ends recovery.
+ assert_eq!(next(), Some(Ok(Object(Foo::RootClose)))); // [Root] Close
+ sut.finalize()
+ .expect("recovery must complete in an accepting state");
+}
+
+// Repetition on a nonterminal of the form `(A | ... | Z)` will allow any
+// number of `A` through `Z` in any order.
+// This is similar to the above test.
+#[test]
+fn sum_repetition() {
+ #[derive(Debug, PartialEq, Eq)]
+ enum Foo {
+ Open(QName),
+ Close(QName),
+ }
+
+ impl crate::parse::Object for Foo {}
+
+ const QN_ROOT: QName = QN_PACKAGE;
+ const QN_A: QName = QN_DIM;
+ const QN_B: QName = QN_CLASSIFY;
+ const QN_C: QName = QN_EXPORT;
+
+ ele_parse! {
+ enum Sut;
+ type Object = Foo;
+
+ Root := QN_PACKAGE {
+ @ {} => Foo::Open(QN_ROOT),
+ / => Foo::Close(QN_ROOT),
+
+ // A|B|C in any order,
+ // any number of times.
+ ABC,
+ };
+
+ ABC := (A | B | C );
+
+ A := QN_A {
+ @ {} => Foo::Open(QN_A),
+ / => Foo::Close(QN_A),
+ };
+
+ B := QN_B {
+ @ {} => Foo::Open(QN_B),
+ / => Foo::Close(QN_B),
+ };
+
+ C := QN_C {
+ @ {} => Foo::Open(QN_C),
+ / => Foo::Close(QN_C),
+ };
+ }
+
+ let toks = vec![
+ XirfToken::Open(QN_ROOT, OpenSpan(S1, N), Depth(0)),
+ // A (1)
+ XirfToken::Open(QN_A, OpenSpan(S1, N), Depth(1)),
+ XirfToken::Close(None, CloseSpan::empty(S2), Depth(1)),
+ // A (2)
+ XirfToken::Open(QN_A, OpenSpan(S2, N), Depth(1)),
+ XirfToken::Close(None, CloseSpan::empty(S3), Depth(1)),
+ // B (1)
+ XirfToken::Open(QN_B, OpenSpan(S3, N), Depth(1)),
+ XirfToken::Close(None, CloseSpan::empty(S4), Depth(1)),
+ // C (1)
+ XirfToken::Open(QN_C, OpenSpan(S4, N), Depth(1)),
+ XirfToken::Close(None, CloseSpan::empty(S5), Depth(1)),
+ // B (2)
+ XirfToken::Open(QN_B, OpenSpan(S5, N), Depth(1)),
+ XirfToken::Close(None, CloseSpan::empty(S6), Depth(1)),
+ XirfToken::Close(Some(QN_ROOT), CloseSpan(S7, N), Depth(0)),
+ ];
+
+ use Parsed::*;
+
+ // See notes on preceding repetition test `child_repetition` regarding
+ // the suppression of `Incomplete` for dead states.
+ assert_eq!(
+ Ok(vec![
+ Incomplete, // [Root] Root Open
+ Object(Foo::Open(QN_ROOT)), // [Root@] A Open (>LA)
+ Incomplete, // [A] A Open (<LA)
+ Object(Foo::Open(QN_A)), // [A@] A Close (>LA)
+ Object(Foo::Close(QN_A)), // [A] A Close (<LA)
+ Incomplete, // [A] A Open
+ Object(Foo::Open(QN_A)), // [A@] A Close (>LA)