Parser: Safe token queue, Rustify errors

This commit is contained in:
Simon Ask Ulsnes 2024-02-02 09:52:53 +01:00
parent 77716c3b53
commit 04168ca612
9 changed files with 583 additions and 699 deletions

View file

@ -14,9 +14,9 @@ description = "Safer libyaml port, based on unsafe-libyaml"
documentation = "https://docs.rs/libyaml-safer"
edition = "2021"
keywords = ["yaml"]
license = "MIT"
license = "Apache 2.0 OR MIT"
repository = "https://github.com/simonask/libyaml-safer"
rust-version = "1.56"
rust-version = "1.70"
[workspace]
[dev-dependencies]
@ -26,6 +26,9 @@ unsafe-libyaml-test-suite = { path = "tests/data" }
[lib]
doc-scrape-examples = false
[dependencies]
thiserror = "1.0"
[package.metadata.docs.rs]
targets = ["x86_64-unknown-linux-gnu"]
rustdoc-args = ["--generate-link-to-definition"]

View file

@ -12,14 +12,12 @@
)]
use libyaml_safer::{
yaml_event_delete, yaml_event_t, yaml_parser_delete, yaml_parser_initialize, yaml_parser_parse,
yaml_parser_set_input, yaml_parser_t, YamlEventData, YAML_DOUBLE_QUOTED_SCALAR_STYLE,
YAML_FOLDED_SCALAR_STYLE, YAML_LITERAL_SCALAR_STYLE, YAML_PLAIN_SCALAR_STYLE,
YAML_SINGLE_QUOTED_SCALAR_STYLE,
yaml_parser_delete, yaml_parser_initialize, yaml_parser_parse, yaml_parser_set_input,
yaml_parser_t, YamlEventData, YAML_DOUBLE_QUOTED_SCALAR_STYLE, YAML_FOLDED_SCALAR_STYLE,
YAML_LITERAL_SCALAR_STYLE, YAML_PLAIN_SCALAR_STYLE, YAML_SINGLE_QUOTED_SCALAR_STYLE,
};
use std::env;
use std::error::Error;
use std::fmt::Write as _;
use std::fs::File;
use std::io::{self, Read, Write};
use std::mem::MaybeUninit;
@ -38,21 +36,15 @@ pub(crate) unsafe fn unsafe_main(
yaml_parser_set_input(&mut parser, stdin);
let mut event = yaml_event_t::default();
loop {
if yaml_parser_parse(&mut parser, &mut event).is_err() {
let mut error = format!("Parse error: {}", parser.problem.unwrap_or(""));
if parser.problem_mark.line != 0 || parser.problem_mark.column != 0 {
let _ = write!(
error,
"\nLine: {} Column: {}",
(parser.problem_mark.line).wrapping_add(1_u64),
(parser.problem_mark.column).wrapping_add(1_u64),
);
}
let event = match yaml_parser_parse(&mut parser) {
Err(err) => {
let error = format!("Parse error: {}", err);
yaml_parser_delete(&mut parser);
return Err(error.into());
}
Ok(event) => event,
};
let mut is_end = false;
@ -137,7 +129,6 @@ pub(crate) unsafe fn unsafe_main(
}
}
yaml_event_delete(&mut event);
if is_end {
break;
}

View file

@ -30,7 +30,7 @@ pub enum ReaderError {
#[derive(Debug, thiserror::Error)]
pub enum ScannerError {
#[error("{problem}")]
#[error("{}:{}: {} {} ({}:{})", problem_mark.line, problem_mark.column, problem, context, context_mark.line, context_mark.column)]
Problem {
context: &'static str,
context_mark: yaml_mark_t,
@ -40,3 +40,41 @@ pub enum ScannerError {
#[error(transparent)]
Reader(#[from] ReaderError),
}
#[derive(Debug, thiserror::Error)]
pub enum ParserError {
#[error("no more tokens")]
UnexpectedEof,
#[error("{}:{}: {}", mark.line, mark.column, problem)]
Problem {
problem: &'static str,
mark: yaml_mark_t,
},
#[error("{}:{}: {} {} ({}:{})", mark.line, mark.column, problem, context, context_mark.line, context_mark.column)]
ProblemWithContext {
context: &'static str,
context_mark: yaml_mark_t,
problem: &'static str,
mark: yaml_mark_t,
},
#[error(transparent)]
Scanner(#[from] ScannerError),
}
#[derive(Debug, thiserror::Error)]
pub enum ComposerError {
#[error("{}:{}: {}", mark.line, mark.column, problem)]
Problem {
problem: &'static str,
mark: yaml_mark_t,
},
#[error("{}:{}: {} {} ({}:{})", mark.line, mark.column, problem, context, context_mark.line, context_mark.column)]
ProblemWithContext {
context: &'static str,
context_mark: yaml_mark_t,
problem: &'static str,
mark: yaml_mark_t,
},
#[error(transparent)]
Parser(#[from] ParserError),
}

View file

@ -169,9 +169,7 @@ tie-fighter: '|\-*-/|'
let mut read_in = SANITY_INPUT.as_bytes();
yaml_parser_set_input_string(&mut parser, &mut read_in);
let mut doc = yaml_document_t::default();
if yaml_parser_load(&mut parser, &mut doc).is_err() {
panic!("parser error: {:?} {:?}", parser.error, parser.problem);
}
yaml_parser_load(&mut parser, &mut doc).unwrap();
// let mut doc = doc.assume_init();
// let mut emitter = core::mem::MaybeUninit::uninit();
@ -196,6 +194,22 @@ tie-fighter: '|\-*-/|'
}
}
const TEST_CASE_QF4Y: &str = r#"[
foo: bar
]
"#;
#[test]
fn test_case() {
let mut parser = parser_new();
let mut input = TEST_CASE_QF4Y.as_bytes();
yaml_parser_set_input_string(&mut parser, &mut input);
let mut doc = yaml_document_t::default();
unsafe {
yaml_parser_load(&mut parser, &mut doc).unwrap();
}
}
// #[test]
// fn integration_s7bg() {
// unsafe {
@ -288,4 +302,12 @@ tie-fighter: '|\-*-/|'
emitter.assume_init()
}
}
fn parser_new<'w>() -> yaml_parser_t<'w> {
unsafe {
let mut emitter = core::mem::MaybeUninit::uninit();
yaml_parser_initialize(emitter.as_mut_ptr()).unwrap();
emitter.assume_init()
}
}
}

View file

@ -4,8 +4,7 @@ use alloc::{vec, vec::Vec};
use crate::yaml::{YamlEventData, YamlNodeData};
use crate::{
libc, yaml_alias_data_t, yaml_document_delete, yaml_document_t, yaml_event_t, yaml_mark_t,
yaml_node_pair_t, yaml_node_t, yaml_parser_parse, yaml_parser_t, YAML_COMPOSER_ERROR,
YAML_MEMORY_ERROR,
yaml_node_pair_t, yaml_node_t, yaml_parser_parse, yaml_parser_t, ComposerError,
};
use core::mem::MaybeUninit;
@ -26,65 +25,71 @@ use core::mem::MaybeUninit;
pub unsafe fn yaml_parser_load(
parser: &mut yaml_parser_t,
document: &mut yaml_document_t,
) -> Result<(), ()> {
let mut event = yaml_event_t::default();
) -> Result<(), ComposerError> {
*document = yaml_document_t::default();
document.nodes.reserve(16);
if !parser.stream_start_produced {
if let Err(()) = yaml_parser_parse(parser, &mut event) {
match yaml_parser_parse(parser) {
Ok(yaml_event_t {
data: YamlEventData::StreamStart { .. },
..
}) => (),
Ok(_) => panic!("expected stream start"),
Err(err) => {
yaml_parser_delete_aliases(parser);
yaml_document_delete(document);
return Err(());
} else {
if let YamlEventData::StreamStart { .. } = &event.data {
} else {
panic!("expected stream start");
return Err(err.into());
}
}
}
if parser.stream_end_produced {
return Ok(());
}
if let Ok(()) = yaml_parser_parse(parser, &mut event) {
let err: ComposerError;
match yaml_parser_parse(parser) {
Ok(event) => {
if let YamlEventData::StreamEnd = &event.data {
return Ok(());
}
parser.aliases.reserve(16);
if let Ok(()) = yaml_parser_load_document(parser, &mut event, document) {
match yaml_parser_load_document(parser, event, document) {
Ok(()) => {
yaml_parser_delete_aliases(parser);
return Ok(());
}
Err(e) => err = e,
}
}
Err(e) => err = e.into(),
}
yaml_parser_delete_aliases(parser);
yaml_document_delete(document);
Err(())
Err(err)
}
fn yaml_parser_set_composer_error(
parser: &mut yaml_parser_t,
fn yaml_parser_set_composer_error<T>(
problem: &'static str,
problem_mark: yaml_mark_t,
) -> Result<(), ()> {
parser.error = YAML_COMPOSER_ERROR;
parser.problem = Some(problem);
parser.problem_mark = problem_mark;
Err(())
) -> Result<T, ComposerError> {
Err(ComposerError::Problem {
problem,
mark: problem_mark,
})
}
fn yaml_parser_set_composer_error_context(
parser: &mut yaml_parser_t,
fn yaml_parser_set_composer_error_context<T>(
context: &'static str,
context_mark: yaml_mark_t,
problem: &'static str,
problem_mark: yaml_mark_t,
) -> Result<(), ()> {
parser.error = YAML_COMPOSER_ERROR;
parser.context = Some(context);
parser.context_mark = context_mark;
parser.problem = Some(problem);
parser.problem_mark = problem_mark;
Err(())
) -> Result<T, ComposerError> {
Err(ComposerError::ProblemWithContext {
context,
context_mark,
problem,
mark: problem_mark,
})
}
unsafe fn yaml_parser_delete_aliases(parser: &mut yaml_parser_t) {
@ -93,24 +98,24 @@ unsafe fn yaml_parser_delete_aliases(parser: &mut yaml_parser_t) {
unsafe fn yaml_parser_load_document(
parser: &mut yaml_parser_t,
event: &mut yaml_event_t,
event: yaml_event_t,
document: &mut yaml_document_t,
) -> Result<(), ()> {
) -> Result<(), ComposerError> {
let mut ctx = vec![];
if let YamlEventData::DocumentStart {
version_directive,
tag_directives,
implicit,
} = &mut event.data
} = event.data
{
document.version_directive = *version_directive;
document.tag_directives = core::mem::take(tag_directives);
document.start_implicit = *implicit;
document.version_directive = version_directive;
document.tag_directives = tag_directives;
document.start_implicit = implicit;
document.start_mark = event.start_mark;
ctx.reserve(16);
if let Err(()) = yaml_parser_load_nodes(parser, document, &mut ctx) {
if let Err(err) = yaml_parser_load_nodes(parser, document, &mut ctx) {
ctx.clear();
return Err(());
return Err(err);
}
ctx.clear();
Ok(())
@ -123,40 +128,39 @@ unsafe fn yaml_parser_load_nodes(
parser: &mut yaml_parser_t,
document: &mut yaml_document_t,
ctx: &mut Vec<libc::c_int>,
) -> Result<(), ()> {
let mut event = yaml_event_t::default();
) -> Result<(), ComposerError> {
let end_implicit;
let end_mark;
loop {
yaml_parser_parse(parser, &mut event)?;
match &event.data {
let event = yaml_parser_parse(parser)?;
match event.data {
YamlEventData::NoEvent => panic!("empty event"),
YamlEventData::StreamStart { .. } => panic!("unexpected stream start event"),
YamlEventData::StreamEnd => panic!("unexpected stream end event"),
YamlEventData::DocumentStart { .. } => panic!("unexpected document start event"),
YamlEventData::DocumentEnd { implicit } => {
end_implicit = *implicit;
end_implicit = implicit;
end_mark = event.end_mark;
break;
}
YamlEventData::Alias { .. } => {
yaml_parser_load_alias(parser, &mut event, document, ctx)?;
yaml_parser_load_alias(parser, event, document, ctx)?;
}
YamlEventData::Scalar { .. } => {
yaml_parser_load_scalar(parser, &mut event, document, ctx)?;
yaml_parser_load_scalar(parser, event, document, ctx)?;
}
YamlEventData::SequenceStart { .. } => {
yaml_parser_load_sequence(parser, &mut event, document, ctx)?;
yaml_parser_load_sequence(parser, event, document, ctx)?;
}
YamlEventData::SequenceEnd => {
yaml_parser_load_sequence_end(parser, &mut event, document, ctx)?;
yaml_parser_load_sequence_end(parser, event, document, ctx)?;
}
YamlEventData::MappingStart { .. } => {
yaml_parser_load_mapping(parser, &mut event, document, ctx)?;
yaml_parser_load_mapping(parser, event, document, ctx)?;
}
YamlEventData::MappingEnd => {
yaml_parser_load_mapping_end(parser, &mut event, document, ctx)?;
yaml_parser_load_mapping_end(parser, event, document, ctx)?;
}
}
}
@ -170,7 +174,7 @@ unsafe fn yaml_parser_register_anchor(
document: &mut yaml_document_t,
index: libc::c_int,
anchor: Option<String>,
) -> Result<(), ()> {
) -> Result<(), ComposerError> {
let Some(anchor) = anchor else {
return Ok(());
};
@ -182,7 +186,6 @@ unsafe fn yaml_parser_register_anchor(
for alias_data in parser.aliases.iter() {
if alias_data.anchor == data.anchor {
return yaml_parser_set_composer_error_context(
parser,
"found duplicate anchor; first occurrence",
alias_data.mark,
"second occurrence",
@ -195,11 +198,10 @@ unsafe fn yaml_parser_register_anchor(
}
unsafe fn yaml_parser_load_node_add(
parser: &mut yaml_parser_t,
document: &mut yaml_document_t,
ctx: &mut Vec<libc::c_int>,
index: libc::c_int,
) -> Result<(), ()> {
) -> Result<(), ComposerError> {
if ctx.is_empty() {
return Ok(());
}
@ -207,7 +209,6 @@ unsafe fn yaml_parser_load_node_add(
let parent = &mut document.nodes[parent_index as usize - 1];
match parent.data {
YamlNodeData::Sequence { ref mut items, .. } => {
STACK_LIMIT!(parser, items)?;
items.push(index);
}
YamlNodeData::Mapping { ref mut pairs, .. } => {
@ -224,7 +225,6 @@ unsafe fn yaml_parser_load_node_add(
if do_push {
(*pair).key = index;
(*pair).value = 0;
STACK_LIMIT!(parser, pairs)?;
pairs.push(*pair);
}
}
@ -237,10 +237,10 @@ unsafe fn yaml_parser_load_node_add(
unsafe fn yaml_parser_load_alias(
parser: &mut yaml_parser_t,
event: &mut yaml_event_t, // TODO: Take by value
event: yaml_event_t,
document: &mut yaml_document_t,
ctx: &mut Vec<libc::c_int>,
) -> Result<(), ()> {
) -> Result<(), ComposerError> {
let anchor: &str = if let YamlEventData::Alias { anchor } = &event.data {
&*anchor
} else {
@ -249,72 +249,64 @@ unsafe fn yaml_parser_load_alias(
for alias_data in parser.aliases.iter() {
if alias_data.anchor == anchor {
return yaml_parser_load_node_add(parser, document, ctx, alias_data.index);
return yaml_parser_load_node_add(document, ctx, alias_data.index);
}
}
yaml_parser_set_composer_error(parser, "found undefined alias", event.start_mark)
yaml_parser_set_composer_error("found undefined alias", event.start_mark)
}
unsafe fn yaml_parser_load_scalar(
parser: &mut yaml_parser_t,
event: &mut yaml_event_t, // TODO: Take by value
event: yaml_event_t,
document: &mut yaml_document_t,
ctx: &mut Vec<libc::c_int>,
) -> Result<(), ()> {
let (mut tag, value, style, anchor) = if let YamlEventData::Scalar {
tag,
) -> Result<(), ComposerError> {
let YamlEventData::Scalar {
mut tag,
value,
style,
anchor,
..
} = &event.data
{
(tag.clone(), value, *style, anchor.clone())
} else {
} = event.data
else {
unreachable!()
};
let index: libc::c_int;
if let Ok(()) = STACK_LIMIT!(parser, document.nodes) {
if tag.is_none() || tag.as_deref() == Some("!") {
tag = Some(String::from("tag:yaml.org,2002:str"));
}
let node = yaml_node_t {
data: YamlNodeData::Scalar {
value: value.clone(), // TODO: move
style,
},
data: YamlNodeData::Scalar { value, style },
tag,
start_mark: (*event).start_mark,
end_mark: (*event).end_mark,
start_mark: event.start_mark,
end_mark: event.end_mark,
};
document.nodes.push(node);
index = document.nodes.len() as libc::c_int;
yaml_parser_register_anchor(parser, document, index, anchor)?;
return yaml_parser_load_node_add(parser, document, ctx, index);
}
Err(())
yaml_parser_load_node_add(document, ctx, index)
}
unsafe fn yaml_parser_load_sequence(
parser: &mut yaml_parser_t,
event: &mut yaml_event_t, // TODO: Take by value.
event: yaml_event_t,
document: &mut yaml_document_t,
ctx: &mut Vec<libc::c_int>,
) -> Result<(), ()> {
let (mut tag, style, anchor) = if let YamlEventData::SequenceStart {
anchor, tag, style, ..
} = &event.data
{
(tag.clone(), *style, anchor)
} else {
) -> Result<(), ComposerError> {
let YamlEventData::SequenceStart {
anchor,
mut tag,
style,
..
} = event.data
else {
unreachable!()
};
let mut items = Vec::with_capacity(16);
let index: libc::c_int;
STACK_LIMIT!(parser, document.nodes)?;
if tag.is_none() || tag.as_deref() == Some("!") {
tag = Some(String::from("tag:yaml.org,2002:seq"));
}
@ -332,47 +324,46 @@ unsafe fn yaml_parser_load_sequence(
document.nodes.push(node);
index = document.nodes.len() as libc::c_int;
yaml_parser_register_anchor(parser, document, index, anchor.clone())?;
yaml_parser_load_node_add(parser, document, ctx, index)?;
STACK_LIMIT!(parser, *ctx)?;
yaml_parser_load_node_add(document, ctx, index)?;
ctx.push(index);
Ok(())
}
unsafe fn yaml_parser_load_sequence_end(
_parser: &mut yaml_parser_t,
event: *mut yaml_event_t,
event: yaml_event_t,
document: &mut yaml_document_t,
ctx: &mut Vec<libc::c_int>,
) -> Result<(), ()> {
) -> Result<(), ComposerError> {
__assert!(!ctx.is_empty());
let index: libc::c_int = *ctx.last().unwrap();
__assert!(matches!(
document.nodes[index as usize - 1].data,
YamlNodeData::Sequence { .. }
));
document.nodes[index as usize - 1].end_mark = (*event).end_mark;
document.nodes[index as usize - 1].end_mark = event.end_mark;
_ = ctx.pop();
Ok(())
}
unsafe fn yaml_parser_load_mapping(
parser: &mut yaml_parser_t,
event: &mut yaml_event_t, // TODO: take by value
event: yaml_event_t,
document: &mut yaml_document_t,
ctx: &mut Vec<libc::c_int>,
) -> Result<(), ()> {
let (mut tag, style, anchor) = if let YamlEventData::MappingStart {
anchor, tag, style, ..
} = &event.data
{
(tag.clone(), *style, anchor.clone())
} else {
) -> Result<(), ComposerError> {
let YamlEventData::MappingStart {
anchor,
mut tag,
style,
..
} = event.data
else {
unreachable!()
};
let mut pairs = Vec::with_capacity(16);
let index: libc::c_int;
STACK_LIMIT!(parser, document.nodes)?;
if tag.is_none() || tag.as_deref() == Some("!") {
tag = Some(String::from("tag:yaml.org,2002:map"));
}
@ -388,25 +379,24 @@ unsafe fn yaml_parser_load_mapping(
document.nodes.push(node);
index = document.nodes.len() as libc::c_int;
yaml_parser_register_anchor(parser, document, index, anchor)?;
yaml_parser_load_node_add(parser, document, ctx, index)?;
STACK_LIMIT!(parser, *ctx)?;
yaml_parser_load_node_add(document, ctx, index)?;
ctx.push(index);
Ok(())
}
unsafe fn yaml_parser_load_mapping_end(
_parser: &mut yaml_parser_t,
event: *mut yaml_event_t,
event: yaml_event_t,
document: &mut yaml_document_t,
ctx: &mut Vec<libc::c_int>,
) -> Result<(), ()> {
) -> Result<(), ComposerError> {
__assert!(!ctx.is_empty());
let index: libc::c_int = *ctx.last().unwrap();
__assert!(matches!(
document.nodes[index as usize - 1].data,
YamlNodeData::Mapping { .. }
));
document.nodes[index as usize - 1].end_mark = (*event).end_mark;
document.nodes[index as usize - 1].end_mark = event.end_mark;
_ = ctx.pop();
Ok(())
}

View file

@ -268,17 +268,6 @@ macro_rules! IS_BLANKZ {
};
}
macro_rules! STACK_LIMIT {
($context:expr, $stack:expr) => {
if $stack.len() < libc::c_int::MAX as usize - 1 {
Ok(())
} else {
(*$context).error = YAML_MEMORY_ERROR;
Err(())
}
};
}
pub(crate) fn vecdeque_starts_with<T: PartialEq + Copy>(
vec: &alloc::collections::VecDeque<T>,
needle: &[T],

File diff suppressed because it is too large Load diff

View file

@ -7,7 +7,7 @@ use crate::yaml::{ptrdiff_t, size_t, YamlTokenData};
use crate::{
libc, yaml_mark_t, yaml_parser_t, yaml_simple_key_t, yaml_token_t, ReaderError, ScannerError,
YAML_DOUBLE_QUOTED_SCALAR_STYLE, YAML_FOLDED_SCALAR_STYLE, YAML_LITERAL_SCALAR_STYLE,
YAML_NO_ERROR, YAML_PLAIN_SCALAR_STYLE, YAML_SINGLE_QUOTED_SCALAR_STYLE,
YAML_PLAIN_SCALAR_STYLE, YAML_SINGLE_QUOTED_SCALAR_STYLE,
};
fn CACHE(parser: &mut yaml_parser_t, length: size_t) -> Result<(), ReaderError> {
@ -99,28 +99,25 @@ fn READ_LINE_STRING(parser: &mut yaml_parser_t, string: &mut String) {
/// An application must not alternate the calls of yaml_parser_scan() with the
/// calls of yaml_parser_parse() or yaml_parser_load(). Doing this will break
/// the parser.
pub fn yaml_parser_scan(
parser: &mut yaml_parser_t,
token: &mut yaml_token_t,
) -> Result<(), ScannerError> {
*token = yaml_token_t::default();
if parser.stream_end_produced || parser.error != YAML_NO_ERROR {
return Ok(());
pub fn yaml_parser_scan(parser: &mut yaml_parser_t) -> Result<yaml_token_t, ScannerError> {
if parser.stream_end_produced {
return Ok(yaml_token_t {
data: YamlTokenData::StreamEnd,
..Default::default()
});
}
if !parser.token_available {
yaml_parser_fetch_more_tokens(parser)?;
}
if let Some(popped) = parser.tokens.pop_front() {
*token = popped;
if let Some(token) = parser.tokens.pop_front() {
parser.token_available = false;
parser.tokens_parsed = parser.tokens_parsed.force_add(1);
if let YamlTokenData::StreamEnd = &(*token).data {
if let YamlTokenData::StreamEnd = &token.data {
parser.stream_end_produced = true;
}
Ok(())
Ok(token)
} else {
// token_available should have been false
unreachable!()
unreachable!("no more tokens, but stream-end was not produced")
}
}

View file

@ -670,20 +670,6 @@ pub struct yaml_alias_data_t {
#[repr(C)]
#[non_exhaustive]
pub struct yaml_parser_t<'r> {
/// Error type.
pub error: yaml_error_type_t,
/// Error description.
pub problem: Option<&'static str>,
/// The byte about which the problem occured.
pub problem_offset: size_t,
/// The problematic value (-1 is none).
pub problem_value: libc::c_int,
/// The problem position.
pub problem_mark: yaml_mark_t,
/// The error context.
pub context: Option<&'static str>,
/// The context position.
pub context_mark: yaml_mark_t,
/// Read handler.
pub(crate) read_handler: Option<&'r mut dyn std::io::Read>,
/// Standard (string or file) input data.
@ -743,13 +729,6 @@ pub struct yaml_parser_t<'r> {
impl<'r> Default for yaml_parser_t<'r> {
fn default() -> Self {
Self {
error: Default::default(),
problem: Default::default(),
problem_offset: Default::default(),
problem_value: Default::default(),
problem_mark: Default::default(),
context: Default::default(),
context_mark: Default::default(),
read_handler: None,
input: Default::default(),
eof: Default::default(),