Emitter: Replace yaml_buffer_t with String/Vec

This commit is contained in:
Simon Ask Ulsnes 2024-01-31 09:41:24 +01:00
parent 9f681e6306
commit a4d7fcfbe8
5 changed files with 58 additions and 134 deletions

View file

@ -14,8 +14,7 @@ use core::ptr::{self, addr_of_mut};
const INPUT_RAW_BUFFER_SIZE: usize = 16384; const INPUT_RAW_BUFFER_SIZE: usize = 16384;
const INPUT_BUFFER_SIZE: usize = INPUT_RAW_BUFFER_SIZE * 3; const INPUT_BUFFER_SIZE: usize = INPUT_RAW_BUFFER_SIZE * 3;
const OUTPUT_BUFFER_SIZE: usize = 16384; pub(crate) const OUTPUT_BUFFER_SIZE: usize = 16384;
const OUTPUT_RAW_BUFFER_SIZE: usize = OUTPUT_BUFFER_SIZE * 2 + 2;
pub(crate) unsafe fn yaml_malloc(size: size_t) -> *mut libc::c_void { pub(crate) unsafe fn yaml_malloc(size: size_t) -> *mut libc::c_void {
malloc(size) malloc(size)
@ -129,8 +128,7 @@ pub unsafe fn yaml_emitter_initialize(emitter: *mut yaml_emitter_t) -> Result<()
__assert!(!emitter.is_null()); __assert!(!emitter.is_null());
core::ptr::write(emitter, yaml_emitter_t::default()); core::ptr::write(emitter, yaml_emitter_t::default());
let emitter = &mut *emitter; let emitter = &mut *emitter;
BUFFER_INIT!(emitter.buffer, OUTPUT_BUFFER_SIZE); emitter.buffer.reserve(OUTPUT_BUFFER_SIZE);
BUFFER_INIT!(emitter.raw_buffer, OUTPUT_RAW_BUFFER_SIZE);
emitter.states.reserve(16); emitter.states.reserve(16);
emitter.events.reserve(16); emitter.events.reserve(16);
emitter.indents.reserve(16); emitter.indents.reserve(16);
@ -140,8 +138,8 @@ pub unsafe fn yaml_emitter_initialize(emitter: *mut yaml_emitter_t) -> Result<()
/// Destroy an emitter. /// Destroy an emitter.
pub unsafe fn yaml_emitter_delete(emitter: &mut yaml_emitter_t) { pub unsafe fn yaml_emitter_delete(emitter: &mut yaml_emitter_t) {
BUFFER_DEL!(emitter.buffer); emitter.buffer.clear();
BUFFER_DEL!(emitter.raw_buffer); emitter.raw_buffer.clear();
emitter.states.clear(); emitter.states.clear();
while let Some(mut event) = emitter.events.pop_front() { while let Some(mut event) = emitter.events.pop_front() {
yaml_event_delete(&mut event); yaml_event_delete(&mut event);
@ -153,7 +151,7 @@ pub unsafe fn yaml_emitter_delete(emitter: &mut yaml_emitter_t) {
unsafe fn yaml_string_write_handler( unsafe fn yaml_string_write_handler(
data: *mut libc::c_void, data: *mut libc::c_void,
buffer: *mut libc::c_uchar, buffer: *const libc::c_uchar,
size: size_t, size: size_t,
) -> libc::c_int { ) -> libc::c_int {
let emitter = &mut *(data as *mut yaml_emitter_t); let emitter = &mut *(data as *mut yaml_emitter_t);

View file

@ -47,7 +47,7 @@ pub(crate) unsafe fn unsafe_main(
} }
let mut emitter = emitter.assume_init(); let mut emitter = emitter.assume_init();
unsafe fn write_to_stdio(data: *mut c_void, buffer: *mut u8, size: u64) -> i32 { unsafe fn write_to_stdio(data: *mut c_void, buffer: *const u8, size: u64) -> i32 {
let stdout: *mut &mut dyn Write = data.cast(); let stdout: *mut &mut dyn Write = data.cast();
let bytes = slice::from_raw_parts(buffer.cast(), size as usize); let bytes = slice::from_raw_parts(buffer.cast(), size as usize);
match (*stdout).write(bytes) { match (*stdout).write(bytes) {

View file

@ -1,13 +1,14 @@
use alloc::string::String; use alloc::string::String;
use crate::api::OUTPUT_BUFFER_SIZE;
use crate::macros::{ use crate::macros::{
is_alpha, is_ascii, is_blank, is_blankz, is_bom, is_break, is_printable, is_space, is_alpha, is_ascii, is_blank, is_blankz, is_bom, is_break, is_printable, is_space,
}; };
use crate::ops::{ForceAdd as _, ForceMul as _}; use crate::ops::{ForceAdd as _, ForceMul as _};
use crate::yaml::{size_t, yaml_buffer_t, yaml_char_t, YamlEventData}; use crate::yaml::{size_t, yaml_char_t, YamlEventData};
use crate::{ use crate::{
libc, yaml_emitter_flush, yaml_emitter_t, yaml_event_delete, yaml_event_t, yaml_scalar_style_t, libc, yaml_emitter_flush, yaml_emitter_t, yaml_event_delete, yaml_event_t, yaml_scalar_style_t,
yaml_tag_directive_t, yaml_version_directive_t, PointerExt, YAML_ANY_BREAK, YAML_ANY_ENCODING, yaml_tag_directive_t, yaml_version_directive_t, YAML_ANY_BREAK, YAML_ANY_ENCODING,
YAML_ANY_SCALAR_STYLE, YAML_CRLN_BREAK, YAML_CR_BREAK, YAML_DOUBLE_QUOTED_SCALAR_STYLE, YAML_ANY_SCALAR_STYLE, YAML_CRLN_BREAK, YAML_CR_BREAK, YAML_DOUBLE_QUOTED_SCALAR_STYLE,
YAML_EMITTER_ERROR, YAML_EMIT_BLOCK_MAPPING_FIRST_KEY_STATE, YAML_EMIT_BLOCK_MAPPING_KEY_STATE, YAML_EMITTER_ERROR, YAML_EMIT_BLOCK_MAPPING_FIRST_KEY_STATE, YAML_EMIT_BLOCK_MAPPING_KEY_STATE,
YAML_EMIT_BLOCK_MAPPING_SIMPLE_VALUE_STATE, YAML_EMIT_BLOCK_MAPPING_VALUE_STATE, YAML_EMIT_BLOCK_MAPPING_SIMPLE_VALUE_STATE, YAML_EMIT_BLOCK_MAPPING_VALUE_STATE,
@ -24,7 +25,7 @@ use crate::{
use core::ptr::{self}; use core::ptr::{self};
unsafe fn FLUSH(emitter: &mut yaml_emitter_t) -> Result<(), ()> { unsafe fn FLUSH(emitter: &mut yaml_emitter_t) -> Result<(), ()> {
if emitter.buffer.pointer.wrapping_offset(5_isize) < emitter.buffer.end { if emitter.buffer.len() < OUTPUT_BUFFER_SIZE - 5 {
Ok(()) Ok(())
} else { } else {
yaml_emitter_flush(emitter) yaml_emitter_flush(emitter)
@ -33,10 +34,8 @@ unsafe fn FLUSH(emitter: &mut yaml_emitter_t) -> Result<(), ()> {
unsafe fn PUT(emitter: &mut yaml_emitter_t, value: u8) -> Result<(), ()> { unsafe fn PUT(emitter: &mut yaml_emitter_t, value: u8) -> Result<(), ()> {
FLUSH(emitter)?; FLUSH(emitter)?;
let p = &mut emitter.buffer.pointer; let ch = char::try_from(value).expect("invalid char");
let old_p = *p; emitter.buffer.push(ch);
*p = (*p).wrapping_offset(1);
*old_p = value;
emitter.column += 1; emitter.column += 1;
Ok(()) Ok(())
} }
@ -44,24 +43,11 @@ unsafe fn PUT(emitter: &mut yaml_emitter_t, value: u8) -> Result<(), ()> {
unsafe fn PUT_BREAK(emitter: &mut yaml_emitter_t) -> Result<(), ()> { unsafe fn PUT_BREAK(emitter: &mut yaml_emitter_t) -> Result<(), ()> {
FLUSH(emitter)?; FLUSH(emitter)?;
if emitter.line_break == YAML_CR_BREAK { if emitter.line_break == YAML_CR_BREAK {
let p = &mut emitter.buffer.pointer; emitter.buffer.push('\r');
let old_p = *p;
*p = (*p).wrapping_offset(1);
*old_p = b'\r';
} else if emitter.line_break == YAML_LN_BREAK { } else if emitter.line_break == YAML_LN_BREAK {
let p = &mut emitter.buffer.pointer; emitter.buffer.push('\n');
let old_p = *p;
*p = (*p).wrapping_offset(1);
*old_p = b'\n';
} else if emitter.line_break == YAML_CRLN_BREAK { } else if emitter.line_break == YAML_CRLN_BREAK {
let p = &mut emitter.buffer.pointer; emitter.buffer.push_str("\r\n");
let old_p = *p;
*p = (*p).wrapping_offset(1);
*old_p = b'\r';
let p = &mut emitter.buffer.pointer;
let old_p = *p;
*p = (*p).wrapping_offset(1);
*old_p = b'\n';
}; };
emitter.column = 0; emitter.column = 0;
emitter.line += 1; emitter.line += 1;
@ -77,17 +63,9 @@ unsafe fn WRITE_STR(emitter: &mut yaml_emitter_t, string: &str) -> Result<(), ()
Ok(()) Ok(())
} }
unsafe fn append_to_buffer(buffer: &mut yaml_buffer_t<u8>, to_append: &[u8]) {
debug_assert!(buffer.end.c_offset_from(buffer.pointer) >= to_append.len() as isize);
core::slice::from_raw_parts_mut(buffer.pointer, to_append.len()).copy_from_slice(to_append);
buffer.pointer = buffer.pointer.wrapping_add(to_append.len());
}
unsafe fn WRITE_CHAR(emitter: &mut yaml_emitter_t, ch: char) -> Result<(), ()> { unsafe fn WRITE_CHAR(emitter: &mut yaml_emitter_t, ch: char) -> Result<(), ()> {
FLUSH(emitter)?; // Note: this ensures at least 5 bytes can be written, and we write at most 4. FLUSH(emitter)?;
let mut encoded = [0u8; 4]; emitter.buffer.push(ch);
let encoded = ch.encode_utf8(&mut encoded);
append_to_buffer(&mut emitter.buffer, encoded.as_bytes());
emitter.column += 1; emitter.column += 1;
Ok(()) Ok(())
} }
@ -1252,18 +1230,7 @@ unsafe fn yaml_emitter_analyze_event(
unsafe fn yaml_emitter_write_bom(emitter: &mut yaml_emitter_t) -> Result<(), ()> { unsafe fn yaml_emitter_write_bom(emitter: &mut yaml_emitter_t) -> Result<(), ()> {
FLUSH(emitter)?; FLUSH(emitter)?;
let p = &mut emitter.buffer.pointer; emitter.buffer.push('\u{feff}');
let old_pointer = *p;
*p = (*p).wrapping_offset(1);
*old_pointer = b'\xEF';
let p = &mut emitter.buffer.pointer;
let old_pointer = *p;
*p = (*p).wrapping_offset(1);
*old_pointer = b'\xBB';
let p = &mut emitter.buffer.pointer;
let old_pointer = *p;
*p = (*p).wrapping_offset(1);
*old_pointer = b'\xBF';
Ok(()) Ok(())
} }

View file

@ -1,8 +1,7 @@
use crate::ops::ForceAdd as _;
use crate::yaml::size_t; use crate::yaml::size_t;
use crate::yaml_encoding_t::YAML_UTF16BE_ENCODING;
use crate::{ use crate::{
libc, yaml_emitter_t, PointerExt, YAML_ANY_ENCODING, YAML_UTF16LE_ENCODING, YAML_UTF8_ENCODING, yaml_emitter_t, YAML_ANY_ENCODING, YAML_UTF16LE_ENCODING, YAML_UTF8_ENCODING, YAML_WRITER_ERROR,
YAML_WRITER_ERROR,
}; };
unsafe fn yaml_emitter_set_writer_error( unsafe fn yaml_emitter_set_writer_error(
@ -18,100 +17,50 @@ unsafe fn yaml_emitter_set_writer_error(
pub unsafe fn yaml_emitter_flush(emitter: &mut yaml_emitter_t) -> Result<(), ()> { pub unsafe fn yaml_emitter_flush(emitter: &mut yaml_emitter_t) -> Result<(), ()> {
__assert!((emitter.write_handler).is_some()); __assert!((emitter.write_handler).is_some());
__assert!(emitter.encoding != YAML_ANY_ENCODING); __assert!(emitter.encoding != YAML_ANY_ENCODING);
emitter.buffer.last = emitter.buffer.pointer;
emitter.buffer.pointer = emitter.buffer.start; if emitter.buffer.is_empty() {
if emitter.buffer.start == emitter.buffer.last {
return Ok(()); return Ok(());
} }
if emitter.encoding == YAML_UTF8_ENCODING { if emitter.encoding == YAML_UTF8_ENCODING {
let to_emit = emitter.buffer.as_bytes();
if emitter.write_handler.expect("non-null function pointer")( if emitter.write_handler.expect("non-null function pointer")(
emitter.write_handler_data, emitter.write_handler_data,
emitter.buffer.start, to_emit.as_ptr(),
emitter.buffer.last.c_offset_from(emitter.buffer.start) as size_t, to_emit.len() as size_t,
) != 0 ) != 0
{ {
emitter.buffer.last = emitter.buffer.start; emitter.buffer.clear();
emitter.buffer.pointer = emitter.buffer.start;
return Ok(()); return Ok(());
} else { } else {
return yaml_emitter_set_writer_error(emitter, "write error"); return yaml_emitter_set_writer_error(emitter, "write error");
} }
} }
let low: libc::c_int = if emitter.encoding == YAML_UTF16LE_ENCODING {
0 let big_endian = match emitter.encoding {
} else { YAML_ANY_ENCODING | YAML_UTF8_ENCODING => unreachable!("unhandled encoding"),
1 YAML_UTF16LE_ENCODING => false,
YAML_UTF16BE_ENCODING => true,
}; };
let high: libc::c_int = if emitter.encoding == YAML_UTF16LE_ENCODING {
1 for ch in emitter.buffer.encode_utf16() {
} else { let bytes = if big_endian {
0 ch.to_be_bytes()
};
while emitter.buffer.pointer != emitter.buffer.last {
let mut octet: libc::c_uchar;
let mut value: libc::c_uint;
let mut k: size_t;
octet = *emitter.buffer.pointer;
let width: libc::c_uint = if octet & 0x80 == 0 {
1
} else if octet & 0xE0 == 0xC0 {
2
} else if octet & 0xF0 == 0xE0 {
3
} else if octet & 0xF8 == 0xF0 {
4
} else { } else {
0 ch.to_le_bytes()
} as libc::c_uint; };
value = if octet & 0x80 == 0 { emitter.raw_buffer.extend(bytes);
octet & 0x7F
} else if octet & 0xE0 == 0xC0 {
octet & 0x1F
} else if octet & 0xF0 == 0xE0 {
octet & 0xF
} else if octet & 0xF8 == 0xF0 {
octet & 0x7
} else {
0
} as libc::c_uint;
k = 1_u64;
while k < width as libc::c_ulong {
octet = *emitter.buffer.pointer.wrapping_offset(k as isize);
value = (value << 6).force_add((octet & 0x3F) as libc::c_uint);
k = k.force_add(1);
}
emitter.buffer.pointer = emitter.buffer.pointer.wrapping_offset(width as isize);
if value < 0x10000 {
*emitter.raw_buffer.last.wrapping_offset(high as isize) = (value >> 8) as libc::c_uchar;
*emitter.raw_buffer.last.wrapping_offset(low as isize) =
(value & 0xFF) as libc::c_uchar;
emitter.raw_buffer.last = emitter.raw_buffer.last.wrapping_offset(2_isize);
} else {
value = value.wrapping_sub(0x10000);
*emitter.raw_buffer.last.wrapping_offset(high as isize) =
0xD8_u32.force_add(value >> 18) as libc::c_uchar;
*emitter.raw_buffer.last.wrapping_offset(low as isize) =
(value >> 10 & 0xFF) as libc::c_uchar;
*emitter.raw_buffer.last.wrapping_offset((high + 2) as isize) =
0xDC_u32.force_add(value >> 8 & 0xFF) as libc::c_uchar;
*emitter.raw_buffer.last.wrapping_offset((low + 2) as isize) =
(value & 0xFF) as libc::c_uchar;
emitter.raw_buffer.last = emitter.raw_buffer.last.wrapping_offset(4_isize);
}
} }
let to_emit = emitter.raw_buffer.as_slice();
if emitter.write_handler.expect("non-null function pointer")( if emitter.write_handler.expect("non-null function pointer")(
emitter.write_handler_data, emitter.write_handler_data,
emitter.raw_buffer.start, to_emit.as_ptr(),
emitter to_emit.len() as size_t,
.raw_buffer
.last
.c_offset_from(emitter.raw_buffer.start) as size_t,
) != 0 ) != 0
{ {
emitter.buffer.last = emitter.buffer.start; emitter.buffer.clear();
emitter.buffer.pointer = emitter.buffer.start; emitter.raw_buffer.clear();
emitter.raw_buffer.last = emitter.raw_buffer.start;
emitter.raw_buffer.pointer = emitter.raw_buffer.start;
Ok(()) Ok(())
} else { } else {
yaml_emitter_set_writer_error(emitter, "write error") yaml_emitter_set_writer_error(emitter, "write error")

View file

@ -715,10 +715,15 @@ pub struct yaml_parser_t {
/// EOF flag /// EOF flag
pub(crate) eof: bool, pub(crate) eof: bool,
/// The working buffer. /// The working buffer.
///
/// This always contains valid UTF-8.
pub(crate) buffer: yaml_buffer_t<yaml_char_t>, pub(crate) buffer: yaml_buffer_t<yaml_char_t>,
/// The number of unread characters in the buffer. /// The number of unread characters in the buffer.
pub(crate) unread: size_t, pub(crate) unread: size_t,
/// The raw buffer. /// The raw buffer.
///
/// This is the raw unchecked input from the read handler (for example, it
/// may be UTF-16 encoded).
pub(crate) raw_buffer: yaml_buffer_t<libc::c_uchar>, pub(crate) raw_buffer: yaml_buffer_t<libc::c_uchar>,
/// The input encoding. /// The input encoding.
pub(crate) encoding: yaml_encoding_t, pub(crate) encoding: yaml_encoding_t,
@ -856,7 +861,7 @@ impl Default for unnamed_yaml_parser_t_input_string {
/// On success, the handler should return 1. If the handler failed, the returned /// On success, the handler should return 1. If the handler failed, the returned
/// value should be 0. /// value should be 0.
pub type yaml_write_handler_t = pub type yaml_write_handler_t =
unsafe fn(data: *mut libc::c_void, buffer: *mut libc::c_uchar, size: size_t) -> libc::c_int; unsafe fn(data: *mut libc::c_void, buffer: *const libc::c_uchar, size: size_t) -> libc::c_int;
/// The emitter states. /// The emitter states.
#[derive(Copy, Clone, Default, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)] #[derive(Copy, Clone, Default, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)]
@ -931,9 +936,14 @@ pub struct yaml_emitter_t {
/// Standard (string or file) output data. /// Standard (string or file) output data.
pub(crate) output: unnamed_yaml_emitter_t_output_string, pub(crate) output: unnamed_yaml_emitter_t_output_string,
/// The working buffer. /// The working buffer.
pub(crate) buffer: yaml_buffer_t<yaml_char_t>, ///
/// This always contains valid UTF-8.
pub(crate) buffer: String,
/// The raw buffer. /// The raw buffer.
pub(crate) raw_buffer: yaml_buffer_t<libc::c_uchar>, ///
/// This contains the output in the encoded format, so for example it may be
/// UTF-16 encoded.
pub(crate) raw_buffer: Vec<u8>,
/// The stream encoding. /// The stream encoding.
pub(crate) encoding: yaml_encoding_t, pub(crate) encoding: yaml_encoding_t,
/// If the output is in the canonical style? /// If the output is in the canonical style?