Refactor parsing in bevy_reflect path module (#9048)

# Objective - Follow up to #8887 - The parsing code in `bevy_reflect/src/path/mod.rs` could also do with some cleanup ## Solution - Create the `parse.rs` module, move all parsing code to this module - The parsing errors also now keep track of the whole parsed string, and are much more fine-grained ### Detailed changes - Move `PathParser` to `parse.rs` submodule - Rename `token_to_access` to `access_following` (yep, goes from 132 lines to 16) - Move parsing tests into the `parse.rs` file
2024-11-26 06:30:19 +00:00 · 2023-08-05 19:18:13 +02:00 · 2023-08-05 19:18:13 +02:00 · 10797d4f15
commit 10797d4f15
parent e52af83045
2 changed files with 194 additions and 182 deletions
--- a/crates/bevy_reflect/src/path/mod.rs
+++ b/crates/bevy_reflect/src/path/mod.rs
@ -1,49 +1,40 @@
 mod access;
+mod parse;

 use std::fmt;
-use std::num::ParseIntError;

 use crate::Reflect;
 use access::Access;
+use parse::PathParser;
 use thiserror::Error;

-type ParseResult<T> = Result<T, ReflectPathParseError>;
+pub use parse::ParseError;

 /// An error specific to accessing a field/index on a `Reflect`.
 #[derive(Debug, PartialEq, Eq, Error)]
 #[error(transparent)]
 pub struct AccessError<'a>(access::Error<'a>);

-/// A parse error for a path string.
-#[derive(Debug, PartialEq, Eq, Error)]
-pub enum ReflectPathParseError {
-    #[error("expected an identifier at offset {offset}")]
-    ExpectedIdent { offset: usize },
-
-    #[error("encountered an unexpected token `{token}`")]
-    UnexpectedToken { offset: usize, token: &'static str },
-
-    #[error("expected token `{token}`, but it wasn't there.")]
-    ExpectedToken { offset: usize, token: &'static str },
-
-    #[error("failed to parse a usize")]
-    IndexParseError(#[from] ParseIntError),
-}
-
 /// An error returned from a failed path string query.
 #[derive(Debug, PartialEq, Eq, Error)]
 pub enum ReflectPathError<'a> {
-    #[error("{error}")]
+    #[error("at {offset} in path specification: {error}")]
    InvalidAccess {
        /// Position in the path string.
        offset: usize,
        error: AccessError<'a>,
    },
+
    #[error("failed to downcast to the path result to the given type")]
    InvalidDowncast,

-    #[error(transparent)]
-    Parse(#[from] ReflectPathParseError),
+    #[error("at {offset} in '{path}': {error}")]
+    ParseError {
+        /// Position in `path`.
+        offset: usize,
+        path: &'a str,
+        error: ParseError<'a>,
+    },
 }

 /// A trait which allows nested [`Reflect`] values to be retrieved with path strings.
@ -417,139 +408,6 @@ impl fmt::Display for ParsedPath {
        Ok(())
    }
 }
-
-struct PathParser<'a> {
-    path: &'a str,
-    index: usize,
-}
-
-impl<'a> PathParser<'a> {
-    fn new(path: &'a str) -> Self {
-        Self { path, index: 0 }
-    }
-
-    fn next_token(&mut self) -> Option<Token<'a>> {
-        if self.index >= self.path.len() {
-            return None;
-        }
-
-        match self.path[self.index..].chars().next().unwrap() {
-            Token::DOT => {
-                self.index += 1;
-                return Some(Token::Dot);
-            }
-            Token::CROSSHATCH => {
-                self.index += 1;
-                return Some(Token::CrossHatch);
-            }
-            Token::OPEN_BRACKET => {
-                self.index += 1;
-                return Some(Token::OpenBracket);
-            }
-            Token::CLOSE_BRACKET => {
-                self.index += 1;
-                return Some(Token::CloseBracket);
-            }
-            _ => {}
-        }
-
-        // we can assume we are parsing an ident now
-        for (char_index, character) in self.path[self.index..].chars().enumerate() {
-            match character {
-                Token::DOT | Token::CROSSHATCH | Token::OPEN_BRACKET | Token::CLOSE_BRACKET => {
-                    let ident = Token::Ident(&self.path[self.index..self.index + char_index]);
-                    self.index += char_index;
-                    return Some(ident);
-                }
-                _ => {}
-            }
-        }
-        let ident = Token::Ident(&self.path[self.index..]);
-        self.index = self.path.len();
-        Some(ident)
-    }
-
-    fn token_to_access(&mut self, token: Token<'a>) -> ParseResult<Access<'a>> {
-        let current_offset = self.index;
-        match token {
-            Token::Dot => {
-                if let Some(Token::Ident(value)) = self.next_token() {
-                    value
-                        .parse::<usize>()
-                        .map(Access::TupleIndex)
-                        .or(Ok(Access::Field(value.into())))
-                } else {
-                    Err(ReflectPathParseError::ExpectedIdent {
-                        offset: current_offset,
-                    })
-                }
-            }
-            Token::CrossHatch => {
-                if let Some(Token::Ident(value)) = self.next_token() {
-                    Ok(Access::FieldIndex(value.parse::<usize>()?))
-                } else {
-                    Err(ReflectPathParseError::ExpectedIdent {
-                        offset: current_offset,
-                    })
-                }
-            }
-            Token::OpenBracket => {
-                let access = if let Some(Token::Ident(value)) = self.next_token() {
-                    Access::ListIndex(value.parse::<usize>()?)
-                } else {
-                    return Err(ReflectPathParseError::ExpectedIdent {
-                        offset: current_offset,
-                    });
-                };
-
-                if !matches!(self.next_token(), Some(Token::CloseBracket)) {
-                    return Err(ReflectPathParseError::ExpectedToken {
-                        offset: current_offset,
-                        token: Token::OPEN_BRACKET_STR,
-                    });
-                }
-
-                Ok(access)
-            }
-            Token::CloseBracket => Err(ReflectPathParseError::UnexpectedToken {
-                offset: current_offset,
-                token: Token::CLOSE_BRACKET_STR,
-            }),
-            Token::Ident(value) => value
-                .parse::<usize>()
-                .map(Access::TupleIndex)
-                .or(Ok(Access::Field(value.into()))),
-        }
-    }
-}
-
-impl<'a> Iterator for PathParser<'a> {
-    type Item = (ParseResult<Access<'a>>, usize);
-
-    fn next(&mut self) -> Option<Self::Item> {
-        let token = self.next_token()?;
-        let index = self.index;
-        Some((self.token_to_access(token), index))
-    }
-}
-
-enum Token<'a> {
-    Dot,
-    CrossHatch,
-    OpenBracket,
-    CloseBracket,
-    Ident(&'a str),
-}
-
-impl<'a> Token<'a> {
-    const DOT: char = '.';
-    const CROSSHATCH: char = '#';
-    const OPEN_BRACKET: char = '[';
-    const CLOSE_BRACKET: char = ']';
-    const OPEN_BRACKET_STR: &'static str = "[";
-    const CLOSE_BRACKET_STR: &'static str = "]";
-}
-
 #[cfg(test)]
 #[allow(clippy::float_cmp, clippy::approx_constant)]
 mod tests {
@ -616,6 +474,13 @@ mod tests {
        Access::Field(field.into())
    }

+    type StaticError = ReflectPathError<'static>;
+
+    fn invalid_access(offset: usize, actual: TypeShape, expected: TypeShape) -> StaticError {
+        let error = AccessError(access::Error::Type { actual, expected });
+        ReflectPathError::InvalidAccess { offset, error }
+    }
+
    #[test]
    fn parsed_path_parse() {
        assert_eq!(
@ -791,40 +656,14 @@ mod tests {
                }),
            }
        );
-
-        assert_eq!(
-            a.reflect_path("x..").err().unwrap(),
-            ReflectPathError::Parse(ReflectPathParseError::ExpectedIdent { offset: 2 })
-        );
-
        assert_eq!(
            a.reflect_path("x[0]").err().unwrap(),
-            ReflectPathError::InvalidAccess {
-                offset: 2,
-                error: AccessError(access::Error::Type {
-                    actual: TypeShape::Struct,
-                    expected: TypeShape::List
-                }),
-            }
+            invalid_access(2, TypeShape::Struct, TypeShape::List)
        );
-
        assert_eq!(
            a.reflect_path("y.x").err().unwrap(),
-            ReflectPathError::InvalidAccess {
-                offset: 2,
-                error: AccessError(access::Error::Type {
-                    actual: TypeShape::List,
-                    expected: TypeShape::Struct
-                }),
-            }
+            invalid_access(2, TypeShape::List, TypeShape::Struct)
        );
-
-        assert!(matches!(
-            a.reflect_path("y[badindex]"),
-            Err(ReflectPathError::Parse(
-                ReflectPathParseError::IndexParseError(_)
-            ))
-        ));
    }

    #[test]
--- a/crates/bevy_reflect/src/path/parse.rs
+++ b/crates/bevy_reflect/src/path/parse.rs
@ -0,0 +1,173 @@
+use std::{fmt, num::ParseIntError};
+
+use thiserror::Error;
+
+use super::{Access, ReflectPathError};
+
+/// An error that occurs when parsing reflect path strings.
+#[derive(Debug, PartialEq, Eq, Error)]
+#[error(transparent)]
+pub struct ParseError<'a>(Error<'a>);
+
+/// A parse error for a path string.
+#[derive(Debug, PartialEq, Eq, Error)]
+enum Error<'a> {
+    #[error("expected an identifier, but reached end of path string")]
+    NoIdent,
+
+    #[error("expected an identifier, got '{0}' instead")]
+    ExpectedIdent(Token<'a>),
+
+    #[error("failed to parse index as integer")]
+    InvalidIndex(#[from] ParseIntError),
+
+    #[error("a '[' wasn't closed, reached end of path string before finding a ']'")]
+    Unclosed,
+
+    #[error("a '[' wasn't closed properly, got '{0}' instead")]
+    BadClose(Token<'a>),
+
+    #[error("a ']' was found before an opening '['")]
+    CloseBeforeOpen,
+}
+
+pub(super) struct PathParser<'a> {
+    path: &'a str,
+    offset: usize,
+}
+impl<'a> PathParser<'a> {
+    pub(super) fn new(path: &'a str) -> Self {
+        PathParser { path, offset: 0 }
+    }
+
+    fn next_token(&mut self) -> Option<Token<'a>> {
+        let input = &self.path[self.offset..];
+
+        // Return with `None` if empty.
+        let first_char = input.chars().next()?;
+
+        if let Some(token) = Token::symbol_from_char(first_char) {
+            self.offset += 1; // NOTE: we assume all symbols are ASCII
+            return Some(token);
+        }
+        // We are parsing either `0123` or `field`.
+        // If we do not find a subsequent token, we are at the end of the parse string.
+        let ident = input.split_once(Token::SYMBOLS).map_or(input, |t| t.0);
+
+        self.offset += ident.len();
+        Some(Token::Ident(Ident(ident)))
+    }
+
+    fn next_ident(&mut self) -> Result<Ident<'a>, Error<'a>> {
+        match self.next_token() {
+            Some(Token::Ident(ident)) => Ok(ident),
+            Some(other) => Err(Error::ExpectedIdent(other)),
+            None => Err(Error::NoIdent),
+        }
+    }
+
+    fn access_following(&mut self, token: Token<'a>) -> Result<Access<'a>, Error<'a>> {
+        match token {
+            Token::Dot => Ok(self.next_ident()?.field()),
+            Token::Pound => self.next_ident()?.field_index(),
+            Token::Ident(ident) => Ok(ident.field()),
+            Token::CloseBracket => Err(Error::CloseBeforeOpen),
+            Token::OpenBracket => {
+                let index_ident = self.next_ident()?.list_index()?;
+                match self.next_token() {
+                    Some(Token::CloseBracket) => Ok(index_ident),
+                    Some(other) => Err(Error::BadClose(other)),
+                    None => Err(Error::Unclosed),
+                }
+            }
+        }
+    }
+}
+impl<'a> Iterator for PathParser<'a> {
+    type Item = (Result<Access<'a>, ReflectPathError<'a>>, usize);
+
+    fn next(&mut self) -> Option<Self::Item> {
+        let token = self.next_token()?;
+        let offset = self.offset;
+        let err = |error| ReflectPathError::ParseError {
+            offset,
+            path: self.path,
+            error: ParseError(error),
+        };
+        Some((self.access_following(token).map_err(err), offset))
+    }
+}
+
+#[derive(Debug, PartialEq, Eq)]
+struct Ident<'a>(&'a str);
+
+impl<'a> Ident<'a> {
+    fn field(self) -> Access<'a> {
+        let field = |_| Access::Field(self.0.into());
+        self.0.parse().map(Access::TupleIndex).unwrap_or_else(field)
+    }
+    fn field_index(self) -> Result<Access<'a>, Error<'a>> {
+        Ok(Access::FieldIndex(self.0.parse()?))
+    }
+    fn list_index(self) -> Result<Access<'a>, Error<'a>> {
+        Ok(Access::ListIndex(self.0.parse()?))
+    }
+}
+
+#[derive(Debug, PartialEq, Eq)]
+enum Token<'a> {
+    Dot,
+    Pound,
+    OpenBracket,
+    CloseBracket,
+    Ident(Ident<'a>),
+}
+impl fmt::Display for Token<'_> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match self {
+            Token::Dot => f.write_str("."),
+            Token::Pound => f.write_str("#"),
+            Token::OpenBracket => f.write_str("["),
+            Token::CloseBracket => f.write_str("]"),
+            Token::Ident(ident) => f.write_str(ident.0),
+        }
+    }
+}
+impl<'a> Token<'a> {
+    const SYMBOLS: &[char] = &['.', '#', '[', ']'];
+    fn symbol_from_char(char: char) -> Option<Self> {
+        match char {
+            '.' => Some(Self::Dot),
+            '#' => Some(Self::Pound),
+            '[' => Some(Self::OpenBracket),
+            ']' => Some(Self::CloseBracket),
+            _ => None,
+        }
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use super::*;
+    use crate::path::ParsedPath;
+
+    #[test]
+    fn parse_invalid() {
+        assert_eq!(
+            ParsedPath::parse_static("x.."),
+            Err(ReflectPathError::ParseError {
+                error: ParseError(Error::ExpectedIdent(Token::Dot)),
+                offset: 2,
+                path: "x..",
+            }),
+        );
+        assert!(matches!(
+            ParsedPath::parse_static("y[badindex]"),
+            Err(ReflectPathError::ParseError {
+                error: ParseError(Error::InvalidIndex(_)),
+                offset: 2,
+                path: "y[badindex]",
+            }),
+        ));
+    }
+}