Mike Gerwitz

Activist for User Freedom

aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--tamer/src/xir/flat.rs2
-rw-r--r--tamer/src/xir/fmt.rs6
-rw-r--r--tamer/src/xir/parse/ele.rs221
-rw-r--r--tamer/src/xir/parse/ele/test.rs150
4 files changed, 369 insertions, 10 deletions
diff --git a/tamer/src/xir/flat.rs b/tamer/src/xir/flat.rs
index e2773bc..0b62057 100644
--- a/tamer/src/xir/flat.rs
+++ b/tamer/src/xir/flat.rs
@@ -57,7 +57,7 @@ use std::{error::Error, fmt::Display};
/// Tag nesting depth
/// (`0` represents the root).
-#[derive(Debug, Clone, PartialEq, Eq)]
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct Depth(pub usize);
impl Display for Depth {
diff --git a/tamer/src/xir/fmt.rs b/tamer/src/xir/fmt.rs
index 620b775..a7dd305 100644
--- a/tamer/src/xir/fmt.rs
+++ b/tamer/src/xir/fmt.rs
@@ -19,7 +19,7 @@
//! XIR formatting types for use with [`crate::fmt`]
-use crate::fmt::{AndQualConjList, Delim, Prefix, Raw, Tt};
+use crate::fmt::{AndQualConjList, Delim, OrQualConjList, Prefix, Raw, Tt};
/// Denote an XML attribute by prefixing the value with `@`.
pub type XmlAttr = Prefix<"@", Raw>;
@@ -33,3 +33,7 @@ pub type OpenXmlEle = Delim<"<", ">", Raw>;
/// Opening tag for XML element as teletypewriter
/// (for use in sentences).
pub type TtOpenXmlEle = Tt<OpenXmlEle>;
+
+/// A choice of a list of opening XML tags.
+pub type OpenEleSumList =
+ OrQualConjList<"element", "one of elements", TtOpenXmlEle>;
diff --git a/tamer/src/xir/parse/ele.rs b/tamer/src/xir/parse/ele.rs
index ec48012..8200a6e 100644
--- a/tamer/src/xir/parse/ele.rs
+++ b/tamer/src/xir/parse/ele.rs
@@ -19,6 +19,11 @@
//! Element parser generator for parsing of [XIRF](super::super::flat).
+use crate::parse::ParseState;
+
+/// A parser accepting a single element.
+pub trait EleParseState: ParseState {}
+
#[macro_export]
macro_rules! ele_parse {
(type Object = $objty:ty; $($rest:tt)*) => {
@@ -41,10 +46,13 @@ macro_rules! ele_parse {
}
};
- (@!nonterm_def <$objty:ty> $nt:ident ($ntreffirst:ident $(| $ntref:ident)+), $($rest:tt)*) => {
- ele_parse!(@!ele_dfn_sum $nt [$ntfirst $($nt)*]);
+ (@!nonterm_def <$objty:ty> $nt:ident
+ ($ntref_first:ident $(| $ntref:ident)+); $($rest:tt)*
+ ) => {
+ ele_parse!(@!ele_dfn_sum <$objty> $nt [$ntref_first $($ntref)*]);
ele_parse! {
+ type Object = $objty;
$($rest)*
}
};
@@ -124,7 +132,6 @@ macro_rules! ele_parse {
)*
}
) => {
- // TODO
paste::paste! {
crate::attr_parse! {
struct [<$nt AttrsState_>] -> [<$nt Attrs_>] {
@@ -148,6 +155,8 @@ macro_rules! ele_parse {
/// Recovery state ignoring all remaining tokens for this
/// element.
RecoverEleIgnore_(crate::xir::QName, crate::xir::OpenSpan, Depth),
+ // Recovery completed because end tag corresponding to the
+ // invalid element has been found.
RecoverEleIgnoreClosed_(crate::xir::QName, crate::xir::CloseSpan),
/// Parsing element attributes.
Attrs_([<$nt AttrsState_>]),
@@ -160,6 +169,17 @@ macro_rules! ele_parse {
Closed_(crate::span::Span),
}
+ impl crate::xir::parse::ele::EleParseState for $nt {}
+
+ impl $nt {
+ /// [`QName`](crate::xir::QName) of the element recognized
+ /// by this parser.
+ #[allow(dead_code)] // used by sum parser
+ const fn qname() -> crate::xir::QName {
+ $qname
+ }
+ }
+
impl std::fmt::Display for $nt {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
use crate::{
@@ -366,12 +386,199 @@ macro_rules! ele_parse {
}
};
- (@!ele_dfn_sum $nt:ident [$($ntref:ident)*]) => {
- #[derive(Debug, PartialEq, Eq)]
- enum $nt {
+ (@!ele_dfn_sum <$objty:ty> $nt:ident [$($ntref:ident)*]) => {
+ $(
+ // Provide a (hopefully) helpful error that can be corrected
+ // rather than any obscure errors that may follow from trying
+ // to compose parsers that were not generated with this macro.
+ assert_impl_all!($ntref: crate::xir::parse::ele::EleParseState);
+ )*
+
+ paste::paste! {
+ #[doc=concat!(
+ "Parser expecting one of ",
+ $("[`", stringify!($ntref), "`], ",)*
+ "."
+ )]
+ #[derive(Debug, PartialEq, Eq, Default)]
+ enum $nt {
+ #[default]
+ Expecting_,
+ /// Recovery state ignoring all remaining tokens for this
+ /// element.
+ RecoverEleIgnore_(crate::xir::QName, crate::xir::OpenSpan, Depth),
+ RecoverEleIgnoreClosed_(crate::xir::QName, crate::xir::CloseSpan),
+ $(
+ $ntref($ntref),
+ )*
+ }
+
+ impl std::fmt::Display for $nt {
+ fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+ use crate::{
+ fmt::{DisplayWrapper, ListDisplayWrapper, TtQuote},
+ xir::fmt::OpenEleSumList,
+ };
+
+ let ntrefs = [
+ $(
+ $ntref::qname(),
+ )*
+ ];
+ let expected = OpenEleSumList::wrap(&ntrefs);
+
+ match self {
+ Self::Expecting_ => {
+ write!(f, "expecting {expected}")
+ },
+
+ Self::RecoverEleIgnore_(name, ..)
+ | Self::RecoverEleIgnoreClosed_(name, ..) => write!(
+ f,
+ "attempting to recover by ignoring element \
+ with unexpected name {given} \
+ (expected {expected})",
+ given = TtQuote::wrap(name),
+ ),
+
+ $(
+ Self::$ntref(st) => std::fmt::Display::fmt(st, f),
+ )*
+ }
+ }
+ }
+
+ #[derive(Debug, PartialEq)]
+ enum [<$nt Error_>] {
+ UnexpectedEle_(crate::xir::QName, crate::span::Span),
+ $(
+ $ntref([<$ntref Error_>]),
+ )*
+ }
+
$(
- $ntref($ntref),
+ impl From<[<$ntref Error_>]> for [<$nt Error_>] {
+ fn from(e: [<$ntref Error_>]) -> Self {
+ [<$nt Error_>]::$ntref(e)
+ }
+ }
)*
+
+ impl std::error::Error for [<$nt Error_>] {
+ fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
+ // TODO
+ None
+ }
+ }
+
+ impl std::fmt::Display for [<$nt Error_>] {
+ fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+ use crate::{
+ fmt::DisplayWrapper,
+ xir::fmt::TtOpenXmlEle,
+ };
+
+ match self {
+ Self::UnexpectedEle_(qname, _) => {
+ write!(f, "unexpected {}", TtOpenXmlEle::wrap(qname))
+ },
+ $(
+ Self::$ntref(e) => std::fmt::Display::fmt(e, f),
+ )*
+ }
+ }
+ }
+
+ impl crate::diagnose::Diagnostic for [<$nt Error_>] {
+ fn describe(&self) -> Vec<crate::diagnose::AnnotatedSpan> {
+ todo!()
+ }
+ }
+
+ impl crate::parse::ParseState for $nt {
+ type Token = crate::xir::flat::XirfToken;
+ type Object = $objty;
+ type Error = [<$nt Error_>];
+
+ fn parse_token(
+ self,
+ tok: Self::Token,
+ _: crate::parse::NoContext,
+ ) -> crate::parse::TransitionResult<Self> {
+ use crate::{
+ parse::{EmptyContext, Transition},
+ xir::flat::XirfToken,
+ };
+
+ use $nt::{Expecting_, RecoverEleIgnore_, RecoverEleIgnoreClosed_};
+
+ match (self, tok) {
+ $(
+ (
+ Expecting_,
+ XirfToken::Open(qname, span, depth)
+ ) if qname == $ntref::qname() => {
+ $ntref::default().delegate(
+ XirfToken::Open(qname, span, depth),
+ EmptyContext,
+ |si| Transition(Self::$ntref(si)),
+ || todo!("inner dead (should not happen here)"),
+ )
+ },
+ )*
+
+ (Expecting_, XirfToken::Open(qname, span, depth)) => {
+ Transition(RecoverEleIgnore_(qname, span, depth)).err(
+ // Use name span rather than full `OpenSpan`
+ // since it's specifically the name that
+ // was unexpected,
+ // not the fact that it's an element.
+ [<$nt Error_>]::UnexpectedEle_(qname, span.name_span())
+ )
+ },
+
+ // XIRF ensures that the closing tag matches the opening,
+ // so we need only check depth.
+ (
+ RecoverEleIgnore_(qname, _, depth_open),
+ XirfToken::Close(_, span, depth_close)
+ ) if depth_open == depth_close => {
+ Transition(RecoverEleIgnoreClosed_(qname, span)).incomplete()
+ },
+
+ (st @ RecoverEleIgnore_(..), _) => {
+ Transition(st).incomplete()
+ },
+
+ $(
+ (Self::$ntref(si), tok) => si.delegate(
+ tok,
+ EmptyContext,
+ |si| Transition(Self::$ntref(si)),
+ || todo!("inner dead"),
+ ),
+ )*
+
+ todo => todo!("sum {todo:?}"),
+ }
+ }
+
+ fn is_accepting(&self) -> bool {
+ match self {
+ Self::RecoverEleIgnoreClosed_(..) => true,
+
+ // Delegate entirely to the inner ParseState.
+ // It is desirable to maintain this state even after
+ // the inner parser is completed so that the inner
+ // state can accurately describe what took place.
+ $(
+ Self::$ntref(si) => si.is_accepting(),
+ )*
+
+ _ => false,
+ }
+ }
+ }
}
};
}
diff --git a/tamer/src/xir/parse/ele/test.rs b/tamer/src/xir/parse/ele/test.rs
index fbbe658..79a96c9 100644
--- a/tamer/src/xir/parse/ele/test.rs
+++ b/tamer/src/xir/parse/ele/test.rs
@@ -34,7 +34,7 @@ use crate::{
attr::{Attr, AttrSpan},
flat::{Depth, XirfToken},
st::qname::*,
- CloseSpan, EleNameLen, EleSpan, OpenSpan,
+ CloseSpan, EleNameLen, EleSpan, OpenSpan, QName,
},
};
@@ -521,3 +521,151 @@ fn child_error_and_recovery() {
sut.collect()
);
}
+
+// A nonterminal of the form `(A | ... | Z)` should accept the element of
+// any of the inner nonterminals.
+#[test]
+fn sum_nonterminal_accepts_any_valid_element() {
+ #[derive(Debug, PartialEq, Eq)]
+ enum Foo {
+ A,
+ B,
+ C,
+ }
+
+ impl crate::parse::Object for Foo {}
+
+ // QNames don't matter as long as they are unique.
+ const QN_A: QName = QN_PACKAGE;
+ const QN_B: QName = QN_CLASSIFY;
+ const QN_C: QName = QN_EXPORT;
+
+ ele_parse! {
+ type Object = Foo;
+
+ Sut := (A | B | C);
+
+ A := QN_A {
+ @ {} => Foo::A,
+ }
+
+ B := QN_B {
+ @ {} => Foo::B,
+ }
+
+ C := QN_C {
+ @ {} => Foo::C,
+ }
+ }
+
+ use Parsed::*;
+ use XirfToken::{Close, Open};
+
+ // Try each in turn with a fresh instance of `Sut`.
+ [(QN_A, Foo::A), (QN_B, Foo::B), (QN_C, Foo::C)]
+ .into_iter()
+ .for_each(|(qname, obj)| {
+ let toks = vec![
+ Open(qname, OpenSpan(S1, N), Depth(0)),
+ Close(None, CloseSpan::empty(S2), Depth(0)),
+ ];
+
+ assert_eq!(
+ Ok(vec![
+ Incomplete, // [X] Open
+ Object(obj), // [X@] Close (>LA)
+ Incomplete, // [X] Close
+ ]),
+ Sut::parse(toks.into_iter()).collect(),
+ );
+ });
+}
+
+#[test]
+fn sum_nonterminal_error_recovery() {
+ #[derive(Debug, PartialEq, Eq)]
+ enum Foo {
+ A,
+ B,
+ }
+
+ impl crate::parse::Object for Foo {}
+
+ // QNames don't matter as long as they are unique.
+ const QN_A: QName = QN_PACKAGE;
+ const QN_B: QName = QN_CLASSIFY;
+ let unexpected: QName = "unexpected".unwrap_into();
+
+ ele_parse! {
+ type Object = Foo;
+
+ Sut := (A | B);
+
+ A := QN_A {
+ @ {} => Foo::A,
+ }
+
+ B := QN_B {
+ @ {} => Foo::B,
+ }
+ }
+
+ // Something >0 just to assert that we're actually paying attention to
+ // it when consuming tokens during recovery.
+ let depth = Depth(5);
+ let depth_child = Depth(6);
+
+ let toks = vec![
+ // Neither A nor B,
+ // which will produce an error and enter recovery.
+ XirfToken::Open(unexpected, OpenSpan(S1, N), depth),
+ // A child element to be ignored,
+ // to ensure that its closing tag will not halt recovery
+ // prematurely.
+ // This further tests that it's too late to provide a valid opening
+ // token
+ // (which is good because we're not at the right depth).
+ XirfToken::Open(QN_A, OpenSpan(S2, N), depth_child),
+ XirfToken::Close(None, CloseSpan::empty(S3), depth_child),
+ // Closing token for the bad element at the corresponding depth,
+ // which will end recovery.
+ XirfToken::Close(Some(unexpected), CloseSpan(S4, N), depth),
+ ];
+
+ let mut sut = Sut::parse(toks.into_iter());
+
+ // The first token of input is the unexpected element,
+ // and so should result an error.
+ // The referenced span should be the _name_ of the element,
+ // not the tag,
+ // since the error is referring not to the fact that an element
+ // was encountered
+ // (which was expected),
+ // but to the fact that the name was not the one expected.
+ assert_eq!(
+ sut.next(),
+ // TODO: This references generated identifiers.
+ Some(Err(ParseError::StateError(SutError_::UnexpectedEle_(
+ unexpected,
+ OpenSpan(S1, N).name_span(),
+ )))),
+ );
+
+ // We should have now entered a recovery mode whereby we discard
+ // input until we close the element that introduced the error.
+ assert_eq!(sut.next(), Some(Ok(Parsed::Incomplete))); // Open child
+ assert_eq!(sut.next(), Some(Ok(Parsed::Incomplete))); // Close child
+
+ // The recovery state must not be in an accepting state,
+ // because we didn't close at the root depth yet.
+ let (mut sut, _) =
+ sut.finalize().expect_err("recovery must not be accepting");
+
+ // The next token should close the element that is in error,
+ // and bring us into an accepting state.
+ // But since we are not emitting tokens,
+ // we'll still be marked as incomplete.
+ assert_eq!(Some(Ok(Parsed::Incomplete)), sut.next()); // Close root
+ sut.finalize()
+ .expect("recovery must complete in an accepting state");
+}