Emitter: Replace yaml_buffer_t with String/Vec

This commit is contained in:
Simon Ask Ulsnes 2024-01-31 09:41:24 +01:00
parent 9f681e6306
commit a4d7fcfbe8
5 changed files with 58 additions and 134 deletions

View file

@ -14,8 +14,7 @@ use core::ptr::{self, addr_of_mut};
const INPUT_RAW_BUFFER_SIZE: usize = 16384;
const INPUT_BUFFER_SIZE: usize = INPUT_RAW_BUFFER_SIZE * 3;
const OUTPUT_BUFFER_SIZE: usize = 16384;
const OUTPUT_RAW_BUFFER_SIZE: usize = OUTPUT_BUFFER_SIZE * 2 + 2;
pub(crate) const OUTPUT_BUFFER_SIZE: usize = 16384;
pub(crate) unsafe fn yaml_malloc(size: size_t) -> *mut libc::c_void {
malloc(size)
@ -129,8 +128,7 @@ pub unsafe fn yaml_emitter_initialize(emitter: *mut yaml_emitter_t) -> Result<()
__assert!(!emitter.is_null());
core::ptr::write(emitter, yaml_emitter_t::default());
let emitter = &mut *emitter;
BUFFER_INIT!(emitter.buffer, OUTPUT_BUFFER_SIZE);
BUFFER_INIT!(emitter.raw_buffer, OUTPUT_RAW_BUFFER_SIZE);
emitter.buffer.reserve(OUTPUT_BUFFER_SIZE);
emitter.states.reserve(16);
emitter.events.reserve(16);
emitter.indents.reserve(16);
@ -140,8 +138,8 @@ pub unsafe fn yaml_emitter_initialize(emitter: *mut yaml_emitter_t) -> Result<()
/// Destroy an emitter.
pub unsafe fn yaml_emitter_delete(emitter: &mut yaml_emitter_t) {
BUFFER_DEL!(emitter.buffer);
BUFFER_DEL!(emitter.raw_buffer);
emitter.buffer.clear();
emitter.raw_buffer.clear();
emitter.states.clear();
while let Some(mut event) = emitter.events.pop_front() {
yaml_event_delete(&mut event);
@ -153,7 +151,7 @@ pub unsafe fn yaml_emitter_delete(emitter: &mut yaml_emitter_t) {
unsafe fn yaml_string_write_handler(
data: *mut libc::c_void,
buffer: *mut libc::c_uchar,
buffer: *const libc::c_uchar,
size: size_t,
) -> libc::c_int {
let emitter = &mut *(data as *mut yaml_emitter_t);

View file

@ -47,7 +47,7 @@ pub(crate) unsafe fn unsafe_main(
}
let mut emitter = emitter.assume_init();
unsafe fn write_to_stdio(data: *mut c_void, buffer: *mut u8, size: u64) -> i32 {
unsafe fn write_to_stdio(data: *mut c_void, buffer: *const u8, size: u64) -> i32 {
let stdout: *mut &mut dyn Write = data.cast();
let bytes = slice::from_raw_parts(buffer.cast(), size as usize);
match (*stdout).write(bytes) {

View file

@ -1,13 +1,14 @@
use alloc::string::String;
use crate::api::OUTPUT_BUFFER_SIZE;
use crate::macros::{
is_alpha, is_ascii, is_blank, is_blankz, is_bom, is_break, is_printable, is_space,
};
use crate::ops::{ForceAdd as _, ForceMul as _};
use crate::yaml::{size_t, yaml_buffer_t, yaml_char_t, YamlEventData};
use crate::yaml::{size_t, yaml_char_t, YamlEventData};
use crate::{
libc, yaml_emitter_flush, yaml_emitter_t, yaml_event_delete, yaml_event_t, yaml_scalar_style_t,
yaml_tag_directive_t, yaml_version_directive_t, PointerExt, YAML_ANY_BREAK, YAML_ANY_ENCODING,
yaml_tag_directive_t, yaml_version_directive_t, YAML_ANY_BREAK, YAML_ANY_ENCODING,
YAML_ANY_SCALAR_STYLE, YAML_CRLN_BREAK, YAML_CR_BREAK, YAML_DOUBLE_QUOTED_SCALAR_STYLE,
YAML_EMITTER_ERROR, YAML_EMIT_BLOCK_MAPPING_FIRST_KEY_STATE, YAML_EMIT_BLOCK_MAPPING_KEY_STATE,
YAML_EMIT_BLOCK_MAPPING_SIMPLE_VALUE_STATE, YAML_EMIT_BLOCK_MAPPING_VALUE_STATE,
@ -24,7 +25,7 @@ use crate::{
use core::ptr::{self};
unsafe fn FLUSH(emitter: &mut yaml_emitter_t) -> Result<(), ()> {
if emitter.buffer.pointer.wrapping_offset(5_isize) < emitter.buffer.end {
if emitter.buffer.len() < OUTPUT_BUFFER_SIZE - 5 {
Ok(())
} else {
yaml_emitter_flush(emitter)
@ -33,10 +34,8 @@ unsafe fn FLUSH(emitter: &mut yaml_emitter_t) -> Result<(), ()> {
unsafe fn PUT(emitter: &mut yaml_emitter_t, value: u8) -> Result<(), ()> {
FLUSH(emitter)?;
let p = &mut emitter.buffer.pointer;
let old_p = *p;
*p = (*p).wrapping_offset(1);
*old_p = value;
let ch = char::try_from(value).expect("invalid char");
emitter.buffer.push(ch);
emitter.column += 1;
Ok(())
}
@ -44,24 +43,11 @@ unsafe fn PUT(emitter: &mut yaml_emitter_t, value: u8) -> Result<(), ()> {
unsafe fn PUT_BREAK(emitter: &mut yaml_emitter_t) -> Result<(), ()> {
FLUSH(emitter)?;
if emitter.line_break == YAML_CR_BREAK {
let p = &mut emitter.buffer.pointer;
let old_p = *p;
*p = (*p).wrapping_offset(1);
*old_p = b'\r';
emitter.buffer.push('\r');
} else if emitter.line_break == YAML_LN_BREAK {
let p = &mut emitter.buffer.pointer;
let old_p = *p;
*p = (*p).wrapping_offset(1);
*old_p = b'\n';
emitter.buffer.push('\n');
} else if emitter.line_break == YAML_CRLN_BREAK {
let p = &mut emitter.buffer.pointer;
let old_p = *p;
*p = (*p).wrapping_offset(1);
*old_p = b'\r';
let p = &mut emitter.buffer.pointer;
let old_p = *p;
*p = (*p).wrapping_offset(1);
*old_p = b'\n';
emitter.buffer.push_str("\r\n");
};
emitter.column = 0;
emitter.line += 1;
@ -77,17 +63,9 @@ unsafe fn WRITE_STR(emitter: &mut yaml_emitter_t, string: &str) -> Result<(), ()
Ok(())
}
unsafe fn append_to_buffer(buffer: &mut yaml_buffer_t<u8>, to_append: &[u8]) {
debug_assert!(buffer.end.c_offset_from(buffer.pointer) >= to_append.len() as isize);
core::slice::from_raw_parts_mut(buffer.pointer, to_append.len()).copy_from_slice(to_append);
buffer.pointer = buffer.pointer.wrapping_add(to_append.len());
}
unsafe fn WRITE_CHAR(emitter: &mut yaml_emitter_t, ch: char) -> Result<(), ()> {
FLUSH(emitter)?; // Note: this ensures at least 5 bytes can be written, and we write at most 4.
let mut encoded = [0u8; 4];
let encoded = ch.encode_utf8(&mut encoded);
append_to_buffer(&mut emitter.buffer, encoded.as_bytes());
FLUSH(emitter)?;
emitter.buffer.push(ch);
emitter.column += 1;
Ok(())
}
@ -1252,18 +1230,7 @@ unsafe fn yaml_emitter_analyze_event(
unsafe fn yaml_emitter_write_bom(emitter: &mut yaml_emitter_t) -> Result<(), ()> {
FLUSH(emitter)?;
let p = &mut emitter.buffer.pointer;
let old_pointer = *p;
*p = (*p).wrapping_offset(1);
*old_pointer = b'\xEF';
let p = &mut emitter.buffer.pointer;
let old_pointer = *p;
*p = (*p).wrapping_offset(1);
*old_pointer = b'\xBB';
let p = &mut emitter.buffer.pointer;
let old_pointer = *p;
*p = (*p).wrapping_offset(1);
*old_pointer = b'\xBF';
emitter.buffer.push('\u{feff}');
Ok(())
}

View file

@ -1,8 +1,7 @@
use crate::ops::ForceAdd as _;
use crate::yaml::size_t;
use crate::yaml_encoding_t::YAML_UTF16BE_ENCODING;
use crate::{
libc, yaml_emitter_t, PointerExt, YAML_ANY_ENCODING, YAML_UTF16LE_ENCODING, YAML_UTF8_ENCODING,
YAML_WRITER_ERROR,
yaml_emitter_t, YAML_ANY_ENCODING, YAML_UTF16LE_ENCODING, YAML_UTF8_ENCODING, YAML_WRITER_ERROR,
};
unsafe fn yaml_emitter_set_writer_error(
@ -18,100 +17,50 @@ unsafe fn yaml_emitter_set_writer_error(
pub unsafe fn yaml_emitter_flush(emitter: &mut yaml_emitter_t) -> Result<(), ()> {
__assert!((emitter.write_handler).is_some());
__assert!(emitter.encoding != YAML_ANY_ENCODING);
emitter.buffer.last = emitter.buffer.pointer;
emitter.buffer.pointer = emitter.buffer.start;
if emitter.buffer.start == emitter.buffer.last {
if emitter.buffer.is_empty() {
return Ok(());
}
if emitter.encoding == YAML_UTF8_ENCODING {
let to_emit = emitter.buffer.as_bytes();
if emitter.write_handler.expect("non-null function pointer")(
emitter.write_handler_data,
emitter.buffer.start,
emitter.buffer.last.c_offset_from(emitter.buffer.start) as size_t,
to_emit.as_ptr(),
to_emit.len() as size_t,
) != 0
{
emitter.buffer.last = emitter.buffer.start;
emitter.buffer.pointer = emitter.buffer.start;
emitter.buffer.clear();
return Ok(());
} else {
return yaml_emitter_set_writer_error(emitter, "write error");
}
}
let low: libc::c_int = if emitter.encoding == YAML_UTF16LE_ENCODING {
0
} else {
1
let big_endian = match emitter.encoding {
YAML_ANY_ENCODING | YAML_UTF8_ENCODING => unreachable!("unhandled encoding"),
YAML_UTF16LE_ENCODING => false,
YAML_UTF16BE_ENCODING => true,
};
let high: libc::c_int = if emitter.encoding == YAML_UTF16LE_ENCODING {
1
for ch in emitter.buffer.encode_utf16() {
let bytes = if big_endian {
ch.to_be_bytes()
} else {
0
ch.to_le_bytes()
};
while emitter.buffer.pointer != emitter.buffer.last {
let mut octet: libc::c_uchar;
let mut value: libc::c_uint;
let mut k: size_t;
octet = *emitter.buffer.pointer;
let width: libc::c_uint = if octet & 0x80 == 0 {
1
} else if octet & 0xE0 == 0xC0 {
2
} else if octet & 0xF0 == 0xE0 {
3
} else if octet & 0xF8 == 0xF0 {
4
} else {
0
} as libc::c_uint;
value = if octet & 0x80 == 0 {
octet & 0x7F
} else if octet & 0xE0 == 0xC0 {
octet & 0x1F
} else if octet & 0xF0 == 0xE0 {
octet & 0xF
} else if octet & 0xF8 == 0xF0 {
octet & 0x7
} else {
0
} as libc::c_uint;
k = 1_u64;
while k < width as libc::c_ulong {
octet = *emitter.buffer.pointer.wrapping_offset(k as isize);
value = (value << 6).force_add((octet & 0x3F) as libc::c_uint);
k = k.force_add(1);
}
emitter.buffer.pointer = emitter.buffer.pointer.wrapping_offset(width as isize);
if value < 0x10000 {
*emitter.raw_buffer.last.wrapping_offset(high as isize) = (value >> 8) as libc::c_uchar;
*emitter.raw_buffer.last.wrapping_offset(low as isize) =
(value & 0xFF) as libc::c_uchar;
emitter.raw_buffer.last = emitter.raw_buffer.last.wrapping_offset(2_isize);
} else {
value = value.wrapping_sub(0x10000);
*emitter.raw_buffer.last.wrapping_offset(high as isize) =
0xD8_u32.force_add(value >> 18) as libc::c_uchar;
*emitter.raw_buffer.last.wrapping_offset(low as isize) =
(value >> 10 & 0xFF) as libc::c_uchar;
*emitter.raw_buffer.last.wrapping_offset((high + 2) as isize) =
0xDC_u32.force_add(value >> 8 & 0xFF) as libc::c_uchar;
*emitter.raw_buffer.last.wrapping_offset((low + 2) as isize) =
(value & 0xFF) as libc::c_uchar;
emitter.raw_buffer.last = emitter.raw_buffer.last.wrapping_offset(4_isize);
}
emitter.raw_buffer.extend(bytes);
}
let to_emit = emitter.raw_buffer.as_slice();
if emitter.write_handler.expect("non-null function pointer")(
emitter.write_handler_data,
emitter.raw_buffer.start,
emitter
.raw_buffer
.last
.c_offset_from(emitter.raw_buffer.start) as size_t,
to_emit.as_ptr(),
to_emit.len() as size_t,
) != 0
{
emitter.buffer.last = emitter.buffer.start;
emitter.buffer.pointer = emitter.buffer.start;
emitter.raw_buffer.last = emitter.raw_buffer.start;
emitter.raw_buffer.pointer = emitter.raw_buffer.start;
emitter.buffer.clear();
emitter.raw_buffer.clear();
Ok(())
} else {
yaml_emitter_set_writer_error(emitter, "write error")

View file

@ -715,10 +715,15 @@ pub struct yaml_parser_t {
/// EOF flag
pub(crate) eof: bool,
/// The working buffer.
///
/// This always contains valid UTF-8.
pub(crate) buffer: yaml_buffer_t<yaml_char_t>,
/// The number of unread characters in the buffer.
pub(crate) unread: size_t,
/// The raw buffer.
///
/// This is the raw unchecked input from the read handler (for example, it
/// may be UTF-16 encoded).
pub(crate) raw_buffer: yaml_buffer_t<libc::c_uchar>,
/// The input encoding.
pub(crate) encoding: yaml_encoding_t,
@ -856,7 +861,7 @@ impl Default for unnamed_yaml_parser_t_input_string {
/// On success, the handler should return 1. If the handler failed, the returned
/// value should be 0.
pub type yaml_write_handler_t =
unsafe fn(data: *mut libc::c_void, buffer: *mut libc::c_uchar, size: size_t) -> libc::c_int;
unsafe fn(data: *mut libc::c_void, buffer: *const libc::c_uchar, size: size_t) -> libc::c_int;
/// The emitter states.
#[derive(Copy, Clone, Default, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)]
@ -931,9 +936,14 @@ pub struct yaml_emitter_t {
/// Standard (string or file) output data.
pub(crate) output: unnamed_yaml_emitter_t_output_string,
/// The working buffer.
pub(crate) buffer: yaml_buffer_t<yaml_char_t>,
///
/// This always contains valid UTF-8.
pub(crate) buffer: String,
/// The raw buffer.
pub(crate) raw_buffer: yaml_buffer_t<libc::c_uchar>,
///
/// This contains the output in the encoded format, so for example it may be
/// UTF-16 encoded.
pub(crate) raw_buffer: Vec<u8>,
/// The stream encoding.
pub(crate) encoding: yaml_encoding_t,
/// If the output is in the canonical style?