mirror of
https://github.com/simonask/libyaml-safer
synced 2024-11-26 05:10:20 +00:00
Emitter: Replace yaml_buffer_t with String/Vec
This commit is contained in:
parent
9f681e6306
commit
a4d7fcfbe8
5 changed files with 58 additions and 134 deletions
12
src/api.rs
12
src/api.rs
|
@ -14,8 +14,7 @@ use core::ptr::{self, addr_of_mut};
|
|||
|
||||
const INPUT_RAW_BUFFER_SIZE: usize = 16384;
|
||||
const INPUT_BUFFER_SIZE: usize = INPUT_RAW_BUFFER_SIZE * 3;
|
||||
const OUTPUT_BUFFER_SIZE: usize = 16384;
|
||||
const OUTPUT_RAW_BUFFER_SIZE: usize = OUTPUT_BUFFER_SIZE * 2 + 2;
|
||||
pub(crate) const OUTPUT_BUFFER_SIZE: usize = 16384;
|
||||
|
||||
pub(crate) unsafe fn yaml_malloc(size: size_t) -> *mut libc::c_void {
|
||||
malloc(size)
|
||||
|
@ -129,8 +128,7 @@ pub unsafe fn yaml_emitter_initialize(emitter: *mut yaml_emitter_t) -> Result<()
|
|||
__assert!(!emitter.is_null());
|
||||
core::ptr::write(emitter, yaml_emitter_t::default());
|
||||
let emitter = &mut *emitter;
|
||||
BUFFER_INIT!(emitter.buffer, OUTPUT_BUFFER_SIZE);
|
||||
BUFFER_INIT!(emitter.raw_buffer, OUTPUT_RAW_BUFFER_SIZE);
|
||||
emitter.buffer.reserve(OUTPUT_BUFFER_SIZE);
|
||||
emitter.states.reserve(16);
|
||||
emitter.events.reserve(16);
|
||||
emitter.indents.reserve(16);
|
||||
|
@ -140,8 +138,8 @@ pub unsafe fn yaml_emitter_initialize(emitter: *mut yaml_emitter_t) -> Result<()
|
|||
|
||||
/// Destroy an emitter.
|
||||
pub unsafe fn yaml_emitter_delete(emitter: &mut yaml_emitter_t) {
|
||||
BUFFER_DEL!(emitter.buffer);
|
||||
BUFFER_DEL!(emitter.raw_buffer);
|
||||
emitter.buffer.clear();
|
||||
emitter.raw_buffer.clear();
|
||||
emitter.states.clear();
|
||||
while let Some(mut event) = emitter.events.pop_front() {
|
||||
yaml_event_delete(&mut event);
|
||||
|
@ -153,7 +151,7 @@ pub unsafe fn yaml_emitter_delete(emitter: &mut yaml_emitter_t) {
|
|||
|
||||
unsafe fn yaml_string_write_handler(
|
||||
data: *mut libc::c_void,
|
||||
buffer: *mut libc::c_uchar,
|
||||
buffer: *const libc::c_uchar,
|
||||
size: size_t,
|
||||
) -> libc::c_int {
|
||||
let emitter = &mut *(data as *mut yaml_emitter_t);
|
||||
|
|
|
@ -47,7 +47,7 @@ pub(crate) unsafe fn unsafe_main(
|
|||
}
|
||||
let mut emitter = emitter.assume_init();
|
||||
|
||||
unsafe fn write_to_stdio(data: *mut c_void, buffer: *mut u8, size: u64) -> i32 {
|
||||
unsafe fn write_to_stdio(data: *mut c_void, buffer: *const u8, size: u64) -> i32 {
|
||||
let stdout: *mut &mut dyn Write = data.cast();
|
||||
let bytes = slice::from_raw_parts(buffer.cast(), size as usize);
|
||||
match (*stdout).write(bytes) {
|
||||
|
|
|
@ -1,13 +1,14 @@
|
|||
use alloc::string::String;
|
||||
|
||||
use crate::api::OUTPUT_BUFFER_SIZE;
|
||||
use crate::macros::{
|
||||
is_alpha, is_ascii, is_blank, is_blankz, is_bom, is_break, is_printable, is_space,
|
||||
};
|
||||
use crate::ops::{ForceAdd as _, ForceMul as _};
|
||||
use crate::yaml::{size_t, yaml_buffer_t, yaml_char_t, YamlEventData};
|
||||
use crate::yaml::{size_t, yaml_char_t, YamlEventData};
|
||||
use crate::{
|
||||
libc, yaml_emitter_flush, yaml_emitter_t, yaml_event_delete, yaml_event_t, yaml_scalar_style_t,
|
||||
yaml_tag_directive_t, yaml_version_directive_t, PointerExt, YAML_ANY_BREAK, YAML_ANY_ENCODING,
|
||||
yaml_tag_directive_t, yaml_version_directive_t, YAML_ANY_BREAK, YAML_ANY_ENCODING,
|
||||
YAML_ANY_SCALAR_STYLE, YAML_CRLN_BREAK, YAML_CR_BREAK, YAML_DOUBLE_QUOTED_SCALAR_STYLE,
|
||||
YAML_EMITTER_ERROR, YAML_EMIT_BLOCK_MAPPING_FIRST_KEY_STATE, YAML_EMIT_BLOCK_MAPPING_KEY_STATE,
|
||||
YAML_EMIT_BLOCK_MAPPING_SIMPLE_VALUE_STATE, YAML_EMIT_BLOCK_MAPPING_VALUE_STATE,
|
||||
|
@ -24,7 +25,7 @@ use crate::{
|
|||
use core::ptr::{self};
|
||||
|
||||
unsafe fn FLUSH(emitter: &mut yaml_emitter_t) -> Result<(), ()> {
|
||||
if emitter.buffer.pointer.wrapping_offset(5_isize) < emitter.buffer.end {
|
||||
if emitter.buffer.len() < OUTPUT_BUFFER_SIZE - 5 {
|
||||
Ok(())
|
||||
} else {
|
||||
yaml_emitter_flush(emitter)
|
||||
|
@ -33,10 +34,8 @@ unsafe fn FLUSH(emitter: &mut yaml_emitter_t) -> Result<(), ()> {
|
|||
|
||||
unsafe fn PUT(emitter: &mut yaml_emitter_t, value: u8) -> Result<(), ()> {
|
||||
FLUSH(emitter)?;
|
||||
let p = &mut emitter.buffer.pointer;
|
||||
let old_p = *p;
|
||||
*p = (*p).wrapping_offset(1);
|
||||
*old_p = value;
|
||||
let ch = char::try_from(value).expect("invalid char");
|
||||
emitter.buffer.push(ch);
|
||||
emitter.column += 1;
|
||||
Ok(())
|
||||
}
|
||||
|
@ -44,24 +43,11 @@ unsafe fn PUT(emitter: &mut yaml_emitter_t, value: u8) -> Result<(), ()> {
|
|||
unsafe fn PUT_BREAK(emitter: &mut yaml_emitter_t) -> Result<(), ()> {
|
||||
FLUSH(emitter)?;
|
||||
if emitter.line_break == YAML_CR_BREAK {
|
||||
let p = &mut emitter.buffer.pointer;
|
||||
let old_p = *p;
|
||||
*p = (*p).wrapping_offset(1);
|
||||
*old_p = b'\r';
|
||||
emitter.buffer.push('\r');
|
||||
} else if emitter.line_break == YAML_LN_BREAK {
|
||||
let p = &mut emitter.buffer.pointer;
|
||||
let old_p = *p;
|
||||
*p = (*p).wrapping_offset(1);
|
||||
*old_p = b'\n';
|
||||
emitter.buffer.push('\n');
|
||||
} else if emitter.line_break == YAML_CRLN_BREAK {
|
||||
let p = &mut emitter.buffer.pointer;
|
||||
let old_p = *p;
|
||||
*p = (*p).wrapping_offset(1);
|
||||
*old_p = b'\r';
|
||||
let p = &mut emitter.buffer.pointer;
|
||||
let old_p = *p;
|
||||
*p = (*p).wrapping_offset(1);
|
||||
*old_p = b'\n';
|
||||
emitter.buffer.push_str("\r\n");
|
||||
};
|
||||
emitter.column = 0;
|
||||
emitter.line += 1;
|
||||
|
@ -77,17 +63,9 @@ unsafe fn WRITE_STR(emitter: &mut yaml_emitter_t, string: &str) -> Result<(), ()
|
|||
Ok(())
|
||||
}
|
||||
|
||||
unsafe fn append_to_buffer(buffer: &mut yaml_buffer_t<u8>, to_append: &[u8]) {
|
||||
debug_assert!(buffer.end.c_offset_from(buffer.pointer) >= to_append.len() as isize);
|
||||
core::slice::from_raw_parts_mut(buffer.pointer, to_append.len()).copy_from_slice(to_append);
|
||||
buffer.pointer = buffer.pointer.wrapping_add(to_append.len());
|
||||
}
|
||||
|
||||
unsafe fn WRITE_CHAR(emitter: &mut yaml_emitter_t, ch: char) -> Result<(), ()> {
|
||||
FLUSH(emitter)?; // Note: this ensures at least 5 bytes can be written, and we write at most 4.
|
||||
let mut encoded = [0u8; 4];
|
||||
let encoded = ch.encode_utf8(&mut encoded);
|
||||
append_to_buffer(&mut emitter.buffer, encoded.as_bytes());
|
||||
FLUSH(emitter)?;
|
||||
emitter.buffer.push(ch);
|
||||
emitter.column += 1;
|
||||
Ok(())
|
||||
}
|
||||
|
@ -1252,18 +1230,7 @@ unsafe fn yaml_emitter_analyze_event(
|
|||
|
||||
unsafe fn yaml_emitter_write_bom(emitter: &mut yaml_emitter_t) -> Result<(), ()> {
|
||||
FLUSH(emitter)?;
|
||||
let p = &mut emitter.buffer.pointer;
|
||||
let old_pointer = *p;
|
||||
*p = (*p).wrapping_offset(1);
|
||||
*old_pointer = b'\xEF';
|
||||
let p = &mut emitter.buffer.pointer;
|
||||
let old_pointer = *p;
|
||||
*p = (*p).wrapping_offset(1);
|
||||
*old_pointer = b'\xBB';
|
||||
let p = &mut emitter.buffer.pointer;
|
||||
let old_pointer = *p;
|
||||
*p = (*p).wrapping_offset(1);
|
||||
*old_pointer = b'\xBF';
|
||||
emitter.buffer.push('\u{feff}');
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
|
103
src/writer.rs
103
src/writer.rs
|
@ -1,8 +1,7 @@
|
|||
use crate::ops::ForceAdd as _;
|
||||
use crate::yaml::size_t;
|
||||
use crate::yaml_encoding_t::YAML_UTF16BE_ENCODING;
|
||||
use crate::{
|
||||
libc, yaml_emitter_t, PointerExt, YAML_ANY_ENCODING, YAML_UTF16LE_ENCODING, YAML_UTF8_ENCODING,
|
||||
YAML_WRITER_ERROR,
|
||||
yaml_emitter_t, YAML_ANY_ENCODING, YAML_UTF16LE_ENCODING, YAML_UTF8_ENCODING, YAML_WRITER_ERROR,
|
||||
};
|
||||
|
||||
unsafe fn yaml_emitter_set_writer_error(
|
||||
|
@ -18,100 +17,50 @@ unsafe fn yaml_emitter_set_writer_error(
|
|||
pub unsafe fn yaml_emitter_flush(emitter: &mut yaml_emitter_t) -> Result<(), ()> {
|
||||
__assert!((emitter.write_handler).is_some());
|
||||
__assert!(emitter.encoding != YAML_ANY_ENCODING);
|
||||
emitter.buffer.last = emitter.buffer.pointer;
|
||||
emitter.buffer.pointer = emitter.buffer.start;
|
||||
if emitter.buffer.start == emitter.buffer.last {
|
||||
|
||||
if emitter.buffer.is_empty() {
|
||||
return Ok(());
|
||||
}
|
||||
if emitter.encoding == YAML_UTF8_ENCODING {
|
||||
let to_emit = emitter.buffer.as_bytes();
|
||||
if emitter.write_handler.expect("non-null function pointer")(
|
||||
emitter.write_handler_data,
|
||||
emitter.buffer.start,
|
||||
emitter.buffer.last.c_offset_from(emitter.buffer.start) as size_t,
|
||||
to_emit.as_ptr(),
|
||||
to_emit.len() as size_t,
|
||||
) != 0
|
||||
{
|
||||
emitter.buffer.last = emitter.buffer.start;
|
||||
emitter.buffer.pointer = emitter.buffer.start;
|
||||
emitter.buffer.clear();
|
||||
return Ok(());
|
||||
} else {
|
||||
return yaml_emitter_set_writer_error(emitter, "write error");
|
||||
}
|
||||
}
|
||||
let low: libc::c_int = if emitter.encoding == YAML_UTF16LE_ENCODING {
|
||||
0
|
||||
} else {
|
||||
1
|
||||
|
||||
let big_endian = match emitter.encoding {
|
||||
YAML_ANY_ENCODING | YAML_UTF8_ENCODING => unreachable!("unhandled encoding"),
|
||||
YAML_UTF16LE_ENCODING => false,
|
||||
YAML_UTF16BE_ENCODING => true,
|
||||
};
|
||||
let high: libc::c_int = if emitter.encoding == YAML_UTF16LE_ENCODING {
|
||||
1
|
||||
|
||||
for ch in emitter.buffer.encode_utf16() {
|
||||
let bytes = if big_endian {
|
||||
ch.to_be_bytes()
|
||||
} else {
|
||||
0
|
||||
ch.to_le_bytes()
|
||||
};
|
||||
while emitter.buffer.pointer != emitter.buffer.last {
|
||||
let mut octet: libc::c_uchar;
|
||||
let mut value: libc::c_uint;
|
||||
let mut k: size_t;
|
||||
octet = *emitter.buffer.pointer;
|
||||
let width: libc::c_uint = if octet & 0x80 == 0 {
|
||||
1
|
||||
} else if octet & 0xE0 == 0xC0 {
|
||||
2
|
||||
} else if octet & 0xF0 == 0xE0 {
|
||||
3
|
||||
} else if octet & 0xF8 == 0xF0 {
|
||||
4
|
||||
} else {
|
||||
0
|
||||
} as libc::c_uint;
|
||||
value = if octet & 0x80 == 0 {
|
||||
octet & 0x7F
|
||||
} else if octet & 0xE0 == 0xC0 {
|
||||
octet & 0x1F
|
||||
} else if octet & 0xF0 == 0xE0 {
|
||||
octet & 0xF
|
||||
} else if octet & 0xF8 == 0xF0 {
|
||||
octet & 0x7
|
||||
} else {
|
||||
0
|
||||
} as libc::c_uint;
|
||||
k = 1_u64;
|
||||
while k < width as libc::c_ulong {
|
||||
octet = *emitter.buffer.pointer.wrapping_offset(k as isize);
|
||||
value = (value << 6).force_add((octet & 0x3F) as libc::c_uint);
|
||||
k = k.force_add(1);
|
||||
}
|
||||
emitter.buffer.pointer = emitter.buffer.pointer.wrapping_offset(width as isize);
|
||||
if value < 0x10000 {
|
||||
*emitter.raw_buffer.last.wrapping_offset(high as isize) = (value >> 8) as libc::c_uchar;
|
||||
*emitter.raw_buffer.last.wrapping_offset(low as isize) =
|
||||
(value & 0xFF) as libc::c_uchar;
|
||||
emitter.raw_buffer.last = emitter.raw_buffer.last.wrapping_offset(2_isize);
|
||||
} else {
|
||||
value = value.wrapping_sub(0x10000);
|
||||
*emitter.raw_buffer.last.wrapping_offset(high as isize) =
|
||||
0xD8_u32.force_add(value >> 18) as libc::c_uchar;
|
||||
*emitter.raw_buffer.last.wrapping_offset(low as isize) =
|
||||
(value >> 10 & 0xFF) as libc::c_uchar;
|
||||
*emitter.raw_buffer.last.wrapping_offset((high + 2) as isize) =
|
||||
0xDC_u32.force_add(value >> 8 & 0xFF) as libc::c_uchar;
|
||||
*emitter.raw_buffer.last.wrapping_offset((low + 2) as isize) =
|
||||
(value & 0xFF) as libc::c_uchar;
|
||||
emitter.raw_buffer.last = emitter.raw_buffer.last.wrapping_offset(4_isize);
|
||||
}
|
||||
emitter.raw_buffer.extend(bytes);
|
||||
}
|
||||
|
||||
let to_emit = emitter.raw_buffer.as_slice();
|
||||
|
||||
if emitter.write_handler.expect("non-null function pointer")(
|
||||
emitter.write_handler_data,
|
||||
emitter.raw_buffer.start,
|
||||
emitter
|
||||
.raw_buffer
|
||||
.last
|
||||
.c_offset_from(emitter.raw_buffer.start) as size_t,
|
||||
to_emit.as_ptr(),
|
||||
to_emit.len() as size_t,
|
||||
) != 0
|
||||
{
|
||||
emitter.buffer.last = emitter.buffer.start;
|
||||
emitter.buffer.pointer = emitter.buffer.start;
|
||||
emitter.raw_buffer.last = emitter.raw_buffer.start;
|
||||
emitter.raw_buffer.pointer = emitter.raw_buffer.start;
|
||||
emitter.buffer.clear();
|
||||
emitter.raw_buffer.clear();
|
||||
Ok(())
|
||||
} else {
|
||||
yaml_emitter_set_writer_error(emitter, "write error")
|
||||
|
|
16
src/yaml.rs
16
src/yaml.rs
|
@ -715,10 +715,15 @@ pub struct yaml_parser_t {
|
|||
/// EOF flag
|
||||
pub(crate) eof: bool,
|
||||
/// The working buffer.
|
||||
///
|
||||
/// This always contains valid UTF-8.
|
||||
pub(crate) buffer: yaml_buffer_t<yaml_char_t>,
|
||||
/// The number of unread characters in the buffer.
|
||||
pub(crate) unread: size_t,
|
||||
/// The raw buffer.
|
||||
///
|
||||
/// This is the raw unchecked input from the read handler (for example, it
|
||||
/// may be UTF-16 encoded).
|
||||
pub(crate) raw_buffer: yaml_buffer_t<libc::c_uchar>,
|
||||
/// The input encoding.
|
||||
pub(crate) encoding: yaml_encoding_t,
|
||||
|
@ -856,7 +861,7 @@ impl Default for unnamed_yaml_parser_t_input_string {
|
|||
/// On success, the handler should return 1. If the handler failed, the returned
|
||||
/// value should be 0.
|
||||
pub type yaml_write_handler_t =
|
||||
unsafe fn(data: *mut libc::c_void, buffer: *mut libc::c_uchar, size: size_t) -> libc::c_int;
|
||||
unsafe fn(data: *mut libc::c_void, buffer: *const libc::c_uchar, size: size_t) -> libc::c_int;
|
||||
|
||||
/// The emitter states.
|
||||
#[derive(Copy, Clone, Default, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)]
|
||||
|
@ -931,9 +936,14 @@ pub struct yaml_emitter_t {
|
|||
/// Standard (string or file) output data.
|
||||
pub(crate) output: unnamed_yaml_emitter_t_output_string,
|
||||
/// The working buffer.
|
||||
pub(crate) buffer: yaml_buffer_t<yaml_char_t>,
|
||||
///
|
||||
/// This always contains valid UTF-8.
|
||||
pub(crate) buffer: String,
|
||||
/// The raw buffer.
|
||||
pub(crate) raw_buffer: yaml_buffer_t<libc::c_uchar>,
|
||||
///
|
||||
/// This contains the output in the encoded format, so for example it may be
|
||||
/// UTF-16 encoded.
|
||||
pub(crate) raw_buffer: Vec<u8>,
|
||||
/// The stream encoding.
|
||||
pub(crate) encoding: yaml_encoding_t,
|
||||
/// If the output is in the canonical style?
|
||||
|
|
Loading…
Reference in a new issue