diff options
author | Mike Gerwitz <mike.gerwitz@ryansg.com> | 2022-03-18 15:26:05 -0400 |
---|---|---|
committer | Mike Gerwitz <mike.gerwitz@ryansg.com> | 2022-03-18 15:26:05 -0400 |
commit | 0360226caacaafb0d080dfe7da2e1a683d537c59 (patch) | |
tree | 7941a9484dac35fc7d4c9e4d6ba823c0918bc310 /tamer | |
parent | 150b3b9aa4312a321c0bae0c1dabc32ab38719f2 (diff) | |
download | tame-0360226caacaafb0d080dfe7da2e1a683d537c59.tar.gz tame-0360226caacaafb0d080dfe7da2e1a683d537c59.tar.bz2 tame-0360226caacaafb0d080dfe7da2e1a683d537c59.zip |
tamer: xir::parse: Generalize input token type
This adds a `Token` type to `ParseState`. Everything uses `xir::Token`
currently, but `XmloReader` will use `xir::flat::Object`.
Now that this has been generalized beyond XIR, the parser ought to be
hoisted up a level.
DEV-10863
Diffstat (limited to 'tamer')
-rw-r--r-- | tamer/src/obj/xmlo/error.rs | 8 | ||||
-rw-r--r-- | tamer/src/xir.rs | 10 | ||||
-rw-r--r-- | tamer/src/xir/attr/parse.rs | 28 | ||||
-rw-r--r-- | tamer/src/xir/flat.rs | 5 | ||||
-rw-r--r-- | tamer/src/xir/parse.rs | 113 | ||||
-rw-r--r-- | tamer/src/xir/tree.rs | 19 |
6 files changed, 109 insertions, 74 deletions
diff --git a/tamer/src/obj/xmlo/error.rs b/tamer/src/obj/xmlo/error.rs index d2655ad..d8b1b57 100644 --- a/tamer/src/obj/xmlo/error.rs +++ b/tamer/src/obj/xmlo/error.rs @@ -21,7 +21,7 @@ use crate::sym::SymbolId; use crate::tpwrap::quick_xml::{Error as XmlError, InnerXmlError}; -use crate::xir::{parse::ParseError, tree::StackError}; +use crate::xir::{parse::ParseError, tree::StackError, Token}; use std::fmt::Display; /// Error during `xmlo` processing. @@ -38,7 +38,7 @@ pub enum XmloError { /// XML parsing error (legacy, quick-xml). XmlError(XmlError), /// XIR parsing error. - XirtError(ParseError<StackError>), + XirtError(ParseError<Token, StackError>), /// The root node was not an `lv:package`. UnexpectedRoot, /// A `preproc:sym` node was found, but is missing `@name`. @@ -70,8 +70,8 @@ impl From<InnerXmlError> for XmloError { } } -impl From<ParseError<StackError>> for XmloError { - fn from(e: ParseError<StackError>) -> Self { +impl From<ParseError<Token, StackError>> for XmloError { + fn from(e: ParseError<Token, StackError>) -> Self { Self::XirtError(e) } } diff --git a/tamer/src/xir.rs b/tamer/src/xir.rs index ce462c0..b9663cc 100644 --- a/tamer/src/xir.rs +++ b/tamer/src/xir.rs @@ -567,11 +567,11 @@ impl Display for Token { } } -impl Token { +impl parse::Token for Token { /// Retrieve the [`Span`] associated with a given [`Token`]. /// /// Every token has an associated span. - pub fn span(&self) -> Span { + fn span(&self) -> Span { use Token::*; match self { @@ -588,12 +588,6 @@ impl Token { } } -impl From<Token> for Span { - fn from(tok: Token) -> Self { - tok.span() - } -} - #[cfg(test)] mod test { use super::*; diff --git a/tamer/src/xir/attr/parse.rs b/tamer/src/xir/attr/parse.rs index aa57816..dafa966 100644 --- a/tamer/src/xir/attr/parse.rs +++ b/tamer/src/xir/attr/parse.rs @@ -23,7 +23,7 @@ use crate::{ span::Span, xir::{ parse::{ParseState, Transition, TransitionResult}, - QName, Token, + QName, Token as XirToken, }, }; use std::{error::Error, fmt::Display}; @@ -45,20 +45,21 @@ pub enum AttrParseState { } impl ParseState for AttrParseState { + type Token = XirToken; type Object = Attr; type Error = AttrParseError; - fn parse_token(self, tok: Token) -> TransitionResult<Self> { + fn parse_token(self, tok: Self::Token) -> TransitionResult<Self> { use AttrParseState::{Empty, Name}; match (self, tok) { - (Empty, Token::AttrName(name, span)) => { + (Empty, XirToken::AttrName(name, span)) => { Transition(Name(name, span)).incomplete() } (Empty, invalid) => Transition(Empty).dead(invalid), - (Name(name, nspan), Token::AttrValue(value, vspan)) => { + (Name(name, nspan), XirToken::AttrValue(value, vspan)) => { Transition(Empty).with(Attr::new(name, value, (nspan, vspan))) } @@ -87,10 +88,10 @@ impl Default for AttrParseState { #[derive(Debug, PartialEq, Eq)] pub enum AttrParseError { /// [`Token::AttrName`] was expected. - AttrNameExpected(Token), + AttrNameExpected(XirToken), /// [`Token::AttrValue`] was expected. - AttrValueExpected(QName, Span, Token), + AttrValueExpected(QName, Span, XirToken), } impl Display for AttrParseError { @@ -131,7 +132,7 @@ mod test { #[test] fn dead_if_first_token_is_non_attr() { - let tok = Token::Open("foo".unwrap_into(), S); + let tok = XirToken::Open("foo".unwrap_into(), S); let sut = AttrParseState::default(); @@ -153,8 +154,8 @@ mod test { let attr = "attr".unwrap_into(); let val = "val".intern(); - let toks = - [Token::AttrName(attr, S), Token::AttrValue(val, S2)].into_iter(); + let toks = [XirToken::AttrName(attr, S), XirToken::AttrValue(val, S2)] + .into_iter(); let sut = AttrParseState::parse(toks); @@ -176,17 +177,18 @@ mod test { // This token indicates that we're expecting a value to come next in // the token stream. let (Transition(sut), result) = - sut.parse_token(Token::AttrName(attr, S)); + sut.parse_token(XirToken::AttrName(attr, S)); assert_eq!(result, Ok(ParseStatus::Incomplete)); // But we provide something else unexpected. - let (Transition(sut), result) = sut.parse_token(Token::Close(None, S2)); + let (Transition(sut), result) = + sut.parse_token(XirToken::Close(None, S2)); assert_eq!( result, Err(AttrParseError::AttrValueExpected( attr, S, - Token::Close(None, S2) + XirToken::Close(None, S2) )) ); @@ -201,7 +203,7 @@ mod test { // let's actually attempt a recovery. let recover = "value".intern(); let (Transition(sut), result) = - sut.parse_token(Token::AttrValue(recover, S2)); + sut.parse_token(XirToken::AttrValue(recover, S2)); assert_eq!( result, Ok(ParseStatus::Object(Attr::new(attr, recover, (S, S2)))), diff --git a/tamer/src/xir/flat.rs b/tamer/src/xir/flat.rs index 37e411d..b61c166 100644 --- a/tamer/src/xir/flat.rs +++ b/tamer/src/xir/flat.rs @@ -41,7 +41,7 @@ use super::{ attr::{Attr, AttrParseError, AttrParseState}, parse::{ParseState, ParseStatus, ParsedResult, TransitionResult}, - QName, Token, TokenStream, Whitespace, + QName, Token, Token as XirToken, TokenStream, Whitespace, }; use crate::{span::Span, sym::SymbolId, xir::parse::Transition}; use arrayvec::ArrayVec; @@ -109,7 +109,7 @@ pub enum Object { } /// XIRF-compatible attribute parser. -pub trait FlatAttrParseState = ParseState<Object = Attr> +pub trait FlatAttrParseState = ParseState<Token = XirToken, Object = Attr> where <Self as ParseState>::Error: Into<StateError>; @@ -146,6 +146,7 @@ impl<const MAX_DEPTH: usize, SA> ParseState for State<MAX_DEPTH, SA> where SA: FlatAttrParseState, { + type Token = XirToken; type Object = Object; type Error = StateError; diff --git a/tamer/src/xir/parse.rs b/tamer/src/xir/parse.rs index 6dc8dde..611ba43 100644 --- a/tamer/src/xir/parse.rs +++ b/tamer/src/xir/parse.rs @@ -19,7 +19,6 @@ //! Basic streaming parsing framework for XIR lowering operations. -use super::{Token, TokenStream}; use crate::span::Span; use std::fmt::Debug; use std::mem::take; @@ -31,7 +30,34 @@ pub type ParsedResult<S> = ParseResult<S, Parsed<<S as ParseState>::Object>>; /// Result of some non-parsing operation on a [`Parser`], /// with any error having been wrapped in a [`ParseError`]. -pub type ParseResult<S, T> = Result<T, ParseError<<S as ParseState>::Error>>; +pub type ParseResult<S, T> = + Result<T, ParseError<<S as ParseState>::Token, <S as ParseState>::Error>>; + +/// A single datum from a streaming IR with an associated [`Span`]. +/// +/// A token may be a lexeme with associated data, +/// or a more structured object having been lowered from other IRs. +pub trait Token: Display + Debug + PartialEq + Eq { + /// Retrieve the [`Span`] representing the source location of the token. + fn span(&self) -> Span; +} + +impl<T: Token> From<T> for Span { + fn from(tok: T) -> Self { + tok.span() + } +} + +/// An infallible [`Token`] stream. +/// +/// If the token stream originates from an operation that could potentially +/// fail and ought to be propagated, +/// use [`TokenResultStream`]. +/// +/// The name "stream" in place of "iterator" is intended to convey that this +/// type is expected to be processed in real-time as a stream, +/// not read into memory. +pub trait TokenStream<T: Token> = Iterator<Item = T>; /// A deterministic parsing automaton. /// @@ -54,6 +80,9 @@ pub type ParseResult<S, T> = Result<T, ParseError<<S as ParseState>::Error>>; /// [`TokenStream`] at the current position for a given parser /// composition. pub trait ParseState: Default + PartialEq + Eq + Debug { + /// Input tokens to the parser. + type Token: Token; + /// Objects produced by a parser utilizing these states. type Object; @@ -64,7 +93,7 @@ pub trait ParseState: Default + PartialEq + Eq + Debug { /// /// Whether this method is helpful or provides any clarity depends on /// the context and the types that are able to be inferred. - fn parse<I: TokenStream>(toks: I) -> Parser<Self, I> { + fn parse<I: TokenStream<Self::Token>>(toks: I) -> Parser<Self, I> { Parser::from(toks) } @@ -85,7 +114,7 @@ pub trait ParseState: Default + PartialEq + Eq + Debug { /// which in turn makes it easier to compose parsers /// (which conceptually involves stitching together state /// machines). - fn parse_token(self, tok: Token) -> TransitionResult<Self>; + fn parse_token(self, tok: Self::Token) -> TransitionResult<Self>; /// Whether the current state represents an accepting state. /// @@ -109,8 +138,10 @@ pub trait ParseState: Default + PartialEq + Eq + Debug { /// /// This is used by [`ParseState::parse_token`]; /// see that function for rationale. -pub type ParseStateResult<S> = - Result<ParseStatus<<S as ParseState>::Object>, <S as ParseState>::Error>; +pub type ParseStateResult<S> = Result< + ParseStatus<<S as ParseState>::Token, <S as ParseState>::Object>, + <S as ParseState>::Error, +>; /// Denotes a state transition. /// @@ -145,7 +176,7 @@ impl<S: ParseState> Transition<S> { /// This corresponds to [`ParseStatus::Dead`], /// and a calling parser should use the provided [`Token`] as /// lookahead. - pub fn dead(self, tok: Token) -> (Self, ParseStateResult<S>) { + pub fn dead(self, tok: S::Token) -> (Self, ParseStateResult<S>) { (self, Ok(ParseStatus::Dead(tok))) } @@ -180,13 +211,13 @@ pub type TransitionResult<S> = (Transition<S>, ParseStateResult<S>); /// call [`finalize`](Parser::finalize) to ensure that parsing has /// completed in an accepting state. #[derive(Debug, PartialEq, Eq)] -pub struct Parser<S: ParseState, I: TokenStream> { +pub struct Parser<S: ParseState, I: TokenStream<S::Token>> { toks: I, state: S, last_span: Option<Span>, } -impl<S: ParseState, I: TokenStream> Parser<S, I> { +impl<S: ParseState, I: TokenStream<S::Token>> Parser<S, I> { /// Indicate that no further parsing will take place using this parser, /// and [`drop`] it. /// @@ -197,7 +228,9 @@ impl<S: ParseState, I: TokenStream> Parser<S, I> { /// Consequently, /// the caller should expect [`ParseError::UnexpectedEof`] if the /// parser is not in an accepting state. - pub fn finalize(self) -> Result<(), (Self, ParseError<S::Error>)> { + pub fn finalize( + self, + ) -> Result<(), (Self, ParseError<S::Token, S::Error>)> { if self.state.is_accepting() { Ok(()) } else { @@ -207,7 +240,7 @@ impl<S: ParseState, I: TokenStream> Parser<S, I> { } } -impl<S: ParseState, I: TokenStream> Iterator for Parser<S, I> { +impl<S: ParseState, I: TokenStream<S::Token>> Iterator for Parser<S, I> { type Item = ParsedResult<S>; /// Parse a single [`Token`] according to the current @@ -276,7 +309,7 @@ impl<S: ParseState, I: TokenStream> Iterator for Parser<S, I> { /// Parsers may return their own unique errors via the /// [`StateError`][ParseError::StateError] variant. #[derive(Debug, PartialEq, Eq)] -pub enum ParseError<E: Error + PartialEq + Eq> { +pub enum ParseError<T: Token, E: Error + PartialEq + Eq> { /// Token stream ended unexpectedly. /// /// This error means that the parser was expecting more input before @@ -304,15 +337,15 @@ pub enum ParseError<E: Error + PartialEq + Eq> { /// it may be desirable to be able to query [`ParseState`] for what /// tokens are acceptable at this point, /// to provide better error messages. - UnexpectedToken(Token), + UnexpectedToken(T), /// A parser-specific error associated with an inner /// [`ParseState`]. StateError(E), } -impl<EA: Error + PartialEq + Eq> ParseError<EA> { - pub fn inner_into<EB: Error + PartialEq + Eq>(self) -> ParseError<EB> +impl<T: Token, EA: Error + PartialEq + Eq> ParseError<T, EA> { + pub fn inner_into<EB: Error + PartialEq + Eq>(self) -> ParseError<T, EB> where EA: Into<EB>, { @@ -325,13 +358,13 @@ impl<EA: Error + PartialEq + Eq> ParseError<EA> { } } -impl<E: Error + PartialEq + Eq> From<E> for ParseError<E> { +impl<T: Token, E: Error + PartialEq + Eq> From<E> for ParseError<T, E> { fn from(e: E) -> Self { Self::StateError(e) } } -impl<E: Error + PartialEq + Eq> Display for ParseError<E> { +impl<T: Token, E: Error + PartialEq + Eq> Display for ParseError<T, E> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { Self::UnexpectedEof(ospan) => { @@ -350,7 +383,7 @@ impl<E: Error + PartialEq + Eq> Display for ParseError<E> { } } -impl<E: Error + PartialEq + Eq + 'static> Error for ParseError<E> { +impl<T: Token, E: Error + PartialEq + Eq + 'static> Error for ParseError<T, E> { fn source(&self) -> Option<&(dyn Error + 'static)> { match self { Self::StateError(e) => Some(e), @@ -359,7 +392,7 @@ impl<E: Error + PartialEq + Eq + 'static> Error for ParseError<E> { } } -impl<S: ParseState, I: TokenStream> From<I> for Parser<S, I> { +impl<S: ParseState, I: TokenStream<S::Token>> From<I> for Parser<S, I> { fn from(toks: I) -> Self { Self { toks, @@ -371,7 +404,7 @@ impl<S: ParseState, I: TokenStream> From<I> for Parser<S, I> { /// Result of a parsing operation. #[derive(Debug, PartialEq, Eq)] -pub enum ParseStatus<T> { +pub enum ParseStatus<T, O> { /// Additional tokens are needed to complete parsing of the next object. Incomplete, @@ -379,7 +412,7 @@ pub enum ParseStatus<T> { /// /// This does not indicate that the parser is complete, /// as more objects may be able to be emitted. - Object(T), + Object(O), /// Parser encountered a dead state relative to the given token. /// @@ -404,7 +437,7 @@ pub enum ParseStatus<T> { /// /// If there is no parent context to handle the token, /// [`Parser`] must yield an error. - Dead(Token), + Dead(T), } /// Result of a parsing operation. @@ -413,7 +446,7 @@ pub enum ParseStatus<T> { /// operation, /// this type is public-facing and used by [`Parser`]. #[derive(Debug, PartialEq, Eq)] -pub enum Parsed<T> { +pub enum Parsed<O> { /// Additional tokens are needed to complete parsing of the next object. Incomplete, @@ -421,11 +454,11 @@ pub enum Parsed<T> { /// /// This does not indicate that the parser is complete, /// as more objects may be able to be emitted. - Object(T), + Object(O), } -impl<T> From<ParseStatus<T>> for Parsed<T> { - fn from(status: ParseStatus<T>) -> Self { +impl<T: Token, O> From<ParseStatus<T, O>> for Parsed<O> { + fn from(status: ParseStatus<T, O>) -> Self { match status { ParseStatus::Incomplete => Parsed::Incomplete, ParseStatus::Object(x) => Parsed::Object(x), @@ -440,6 +473,7 @@ impl<T> From<ParseStatus<T>> for Parsed<T> { pub mod test { use std::{assert_matches::assert_matches, iter::once}; + use super::super::Token as XirToken; use super::*; use crate::{span::DUMMY_SPAN as DS, sym::GlobalSymbolIntern}; @@ -456,16 +490,17 @@ pub mod test { } impl ParseState for EchoState { - type Object = Token; + type Token = XirToken; + type Object = XirToken; type Error = EchoStateError; - fn parse_token(self, tok: Token) -> TransitionResult<Self> { + fn parse_token(self, tok: XirToken) -> TransitionResult<Self> { match tok { - Token::Comment(..) => Transition(Self::Done).with(tok), - Token::Close(..) => { + XirToken::Comment(..) => Transition(Self::Done).with(tok), + XirToken::Close(..) => { Transition(self).err(EchoStateError::InnerError(tok)) } - Token::Text(..) => Transition(self).dead(tok), + XirToken::Text(..) => Transition(self).dead(tok), _ => Transition(self).with(tok), } } @@ -477,7 +512,7 @@ pub mod test { #[derive(Debug, PartialEq, Eq)] enum EchoStateError { - InnerError(Token), + InnerError(XirToken), } impl Display for EchoStateError { @@ -497,7 +532,7 @@ pub mod test { #[test] fn successful_parse_in_accepting_state_with_spans() { // EchoState is placed into a Done state given Comment. - let tok = Token::Comment("foo".into(), DS); + let tok = XirToken::Comment("foo".into(), DS); let mut toks = once(tok.clone()); let mut sut = Sut::from(&mut toks); @@ -518,7 +553,7 @@ pub mod test { #[test] fn fails_on_end_of_stream_when_not_in_accepting_state() { let span = Span::new(10, 20, "ctx".intern()); - let mut toks = [Token::Close(None, span)].into_iter(); + let mut toks = [XirToken::Close(None, span)].into_iter(); let mut sut = Sut::from(&mut toks); @@ -538,8 +573,8 @@ pub mod test { #[test] fn returns_state_specific_error() { - // Token::Close causes EchoState to produce an error. - let errtok = Token::Close(None, DS); + // XirToken::Close causes EchoState to produce an error. + let errtok = XirToken::Close(None, DS); let mut toks = [errtok.clone()].into_iter(); let mut sut = Sut::from(&mut toks); @@ -564,10 +599,10 @@ pub mod test { // Set up so that we have a single token that we can use for // recovery as part of the same iterator. - let recovery = Token::Comment("recov".into(), DS); + let recovery = XirToken::Comment("recov".into(), DS); let mut toks = [ // Used purely to populate a Span. - Token::Close(None, span), + XirToken::Close(None, span), // Recovery token here: recovery.clone(), ] @@ -605,7 +640,7 @@ pub mod test { #[test] fn unhandled_dead_state_results_in_error() { // A Text will cause our parser to return Dead. - let tok = Token::Text("dead".into(), DS); + let tok = XirToken::Text("dead".into(), DS); let mut toks = once(tok.clone()); let mut sut = Sut::from(&mut toks); diff --git a/tamer/src/xir/tree.rs b/tamer/src/xir/tree.rs index 9a5846b..0933edb 100644 --- a/tamer/src/xir/tree.rs +++ b/tamer/src/xir/tree.rs @@ -173,14 +173,15 @@ //! //! [state machine]: https://en.wikipedia.org/wiki/Finite-state_machine -use self::super::{ +use super::{ attr::{Attr, AttrList, AttrParseError, AttrParseState}, - parse::{ParseError, ParseResult, ParseState, ParseStatus, ParsedResult}, + parse::{ + ParseError, ParseResult, ParseState, ParseStatus, ParsedResult, + TransitionResult, + }, + QName, Token, Token as XirToken, TokenResultStream, TokenStream, }; -use super::{ - parse::TransitionResult, QName, Token, TokenResultStream, TokenStream, -}; use crate::{span::Span, sym::SymbolId, xir::parse::Transition}; use std::{error::Error, fmt::Display, result}; @@ -496,7 +497,7 @@ where Done, } -pub trait StackAttrParseState = ParseState<Object = Attr> +pub trait StackAttrParseState = ParseState<Token = XirToken, Object = Attr> where <Self as ParseState>::Error: Into<StackError>; @@ -507,10 +508,11 @@ impl<SA: StackAttrParseState> Default for Stack<SA> { } impl<SA: StackAttrParseState> ParseState for Stack<SA> { + type Token = XirToken; type Object = Tree; type Error = StackError; - fn parse_token(self, tok: Token) -> TransitionResult<Self> { + fn parse_token(self, tok: Self::Token) -> TransitionResult<Self> { use Stack::*; match (self, tok) { @@ -768,7 +770,8 @@ pub fn parser_from( #[inline] pub fn attr_parser_from<'a>( toks: impl TokenStream, -) -> impl Iterator<Item = result::Result<Attr, ParseError<StackError>>> { +) -> impl Iterator<Item = result::Result<Attr, ParseError<XirToken, StackError>>> +{ use super::parse::Parsed; AttrParseState::parse(toks).filter_map(|parsed| match parsed { |