mirror of
https://github.com/Serial-ATA/lofty-rs
synced 2025-03-04 14:57:17 +00:00
ID3v2: Support UTF-16 TIPL frames with single BOM
This changes the signature of `decode_text` to take the new `TextDecodeOptions`. This allows us to specify a UTF-16 BOM ahead of time. closes #306
This commit is contained in:
parent
7e93b3c1f7
commit
3c33cb1f43
17 changed files with 216 additions and 69 deletions
|
@ -2,7 +2,7 @@ use crate::error::{Id3v2Error, Id3v2ErrorKind, Result};
|
|||
use crate::id3::v2::header::Id3v2Version;
|
||||
use crate::macros::err;
|
||||
use crate::picture::{MimeType, Picture, PictureType};
|
||||
use crate::util::text::{encode_text, TextEncoding};
|
||||
use crate::util::text::{encode_text, TextDecodeOptions, TextEncoding};
|
||||
|
||||
use std::borrow::Cow;
|
||||
use std::io::{Read, Write as _};
|
||||
|
@ -58,16 +58,24 @@ impl AttachedPictureFrame {
|
|||
},
|
||||
}
|
||||
} else {
|
||||
let mime_type_str =
|
||||
crate::util::text::decode_text(reader, TextEncoding::UTF8, true)?.text_or_none();
|
||||
let mime_type_str = crate::util::text::decode_text(
|
||||
reader,
|
||||
TextDecodeOptions::new()
|
||||
.encoding(TextEncoding::Latin1)
|
||||
.terminated(true),
|
||||
)?
|
||||
.text_or_none();
|
||||
mime_type = mime_type_str.map(|mime_type_str| MimeType::from_str(&mime_type_str));
|
||||
};
|
||||
|
||||
let pic_type = PictureType::from_u8(reader.read_u8()?);
|
||||
|
||||
let description = crate::util::text::decode_text(reader, encoding, true)?
|
||||
.text_or_none()
|
||||
.map(Cow::from);
|
||||
let description = crate::util::text::decode_text(
|
||||
reader,
|
||||
TextDecodeOptions::new().encoding(encoding).terminated(true),
|
||||
)?
|
||||
.text_or_none()
|
||||
.map(Cow::from);
|
||||
|
||||
let mut data = Vec::new();
|
||||
reader.read_to_end(&mut data)?;
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
use crate::error::{ErrorKind, Id3v2Error, Id3v2ErrorKind, LoftyError, Result};
|
||||
use crate::util::text::{decode_text, encode_text, TextEncoding};
|
||||
use crate::util::text::{decode_text, encode_text, TextDecodeOptions, TextEncoding};
|
||||
|
||||
use std::hash::{Hash, Hasher};
|
||||
|
||||
|
@ -96,11 +96,21 @@ impl AudioTextFrame {
|
|||
let encoding = TextEncoding::from_u8(content.read_u8()?)
|
||||
.ok_or_else(|| LoftyError::new(ErrorKind::TextDecode("Found invalid encoding")))?;
|
||||
|
||||
let mime_type = decode_text(content, TextEncoding::Latin1, true)?.content;
|
||||
let mime_type = decode_text(
|
||||
content,
|
||||
TextDecodeOptions::new()
|
||||
.encoding(TextEncoding::Latin1)
|
||||
.terminated(true),
|
||||
)?
|
||||
.content;
|
||||
|
||||
let flags = AudioTextFrameFlags::from_u8(content.read_u8()?);
|
||||
|
||||
let equivalent_text = decode_text(content, encoding, true)?.content;
|
||||
let equivalent_text = decode_text(
|
||||
content,
|
||||
TextDecodeOptions::new().encoding(encoding).terminated(true),
|
||||
)?
|
||||
.content;
|
||||
|
||||
Ok(Self {
|
||||
encoding,
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
use crate::error::{ErrorKind, Id3v2Error, Id3v2ErrorKind, LoftyError, Result};
|
||||
use crate::util::text::{decode_text, encode_text, TextEncoding};
|
||||
use crate::util::text::{decode_text, encode_text, TextDecodeOptions, TextEncoding};
|
||||
|
||||
use std::io::{Cursor, Read};
|
||||
|
||||
|
@ -36,9 +36,17 @@ impl GeneralEncapsulatedObject {
|
|||
|
||||
let mut cursor = Cursor::new(&data[1..]);
|
||||
|
||||
let mime_type = decode_text(&mut cursor, TextEncoding::Latin1, true)?;
|
||||
let file_name = decode_text(&mut cursor, encoding, true)?;
|
||||
let descriptor = decode_text(&mut cursor, encoding, true)?;
|
||||
let mime_type = decode_text(
|
||||
&mut cursor,
|
||||
TextDecodeOptions::new()
|
||||
.encoding(TextEncoding::Latin1)
|
||||
.terminated(true),
|
||||
)?;
|
||||
|
||||
let text_decode_options = TextDecodeOptions::new().encoding(encoding).terminated(true);
|
||||
|
||||
let file_name = decode_text(&mut cursor, text_decode_options)?;
|
||||
let descriptor = decode_text(&mut cursor, text_decode_options)?;
|
||||
|
||||
let mut data = Vec::new();
|
||||
cursor.read_to_end(&mut data)?;
|
||||
|
|
|
@ -1,7 +1,9 @@
|
|||
use crate::error::{Id3v2Error, Id3v2ErrorKind, LoftyError, Result};
|
||||
use crate::id3::v2::frame::content::verify_encoding;
|
||||
use crate::id3::v2::header::Id3v2Version;
|
||||
use crate::util::text::{decode_text, encode_text, utf16_decode_bytes, TextEncoding};
|
||||
use crate::util::text::{
|
||||
decode_text, encode_text, utf16_decode_bytes, TextDecodeOptions, TextEncoding,
|
||||
};
|
||||
|
||||
use std::hash::{Hash, Hasher};
|
||||
use std::io::Read;
|
||||
|
@ -57,11 +59,15 @@ impl ExtendedTextFrame {
|
|||
};
|
||||
|
||||
let encoding = verify_encoding(encoding_byte, version)?;
|
||||
let description = decode_text(reader, encoding, true)?;
|
||||
let description = decode_text(
|
||||
reader,
|
||||
TextDecodeOptions::new().encoding(encoding).terminated(true),
|
||||
)?;
|
||||
|
||||
let frame_content;
|
||||
if encoding != TextEncoding::UTF16 {
|
||||
frame_content = decode_text(reader, encoding, false)?.content;
|
||||
frame_content =
|
||||
decode_text(reader, TextDecodeOptions::new().encoding(encoding))?.content;
|
||||
|
||||
return Ok(Some(ExtendedTextFrame {
|
||||
encoding,
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
use crate::error::Result;
|
||||
use crate::id3::v2::frame::content::verify_encoding;
|
||||
use crate::id3::v2::header::Id3v2Version;
|
||||
use crate::util::text::{decode_text, encode_text, TextEncoding};
|
||||
use crate::util::text::{decode_text, encode_text, TextDecodeOptions, TextEncoding};
|
||||
|
||||
use std::hash::{Hash, Hasher};
|
||||
use std::io::Read;
|
||||
|
@ -57,8 +57,16 @@ impl ExtendedUrlFrame {
|
|||
};
|
||||
|
||||
let encoding = verify_encoding(encoding_byte, version)?;
|
||||
let description = decode_text(reader, encoding, true)?.content;
|
||||
let content = decode_text(reader, TextEncoding::Latin1, false)?.content;
|
||||
let description = decode_text(
|
||||
reader,
|
||||
TextDecodeOptions::new().encoding(encoding).terminated(true),
|
||||
)?
|
||||
.content;
|
||||
let content = decode_text(
|
||||
reader,
|
||||
TextDecodeOptions::new().encoding(TextEncoding::Latin1),
|
||||
)?
|
||||
.content;
|
||||
|
||||
Ok(Some(ExtendedUrlFrame {
|
||||
encoding,
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
use crate::error::Result;
|
||||
use crate::id3::v2::frame::content::verify_encoding;
|
||||
use crate::id3::v2::header::Id3v2Version;
|
||||
use crate::util::text::{decode_text, encode_text, TextEncoding};
|
||||
use crate::util::text::{decode_text, encode_text, TextDecodeOptions, TextEncoding};
|
||||
|
||||
use byteorder::ReadBytesExt;
|
||||
|
||||
|
@ -40,9 +40,28 @@ impl KeyValueFrame {
|
|||
|
||||
let mut values = Vec::new();
|
||||
|
||||
let mut text_decode_options = TextDecodeOptions::new().encoding(encoding).terminated(true);
|
||||
|
||||
// We have to read the first key/value pair separately because it may be the only string with a BOM
|
||||
|
||||
let first_key = decode_text(reader, text_decode_options)?;
|
||||
|
||||
if first_key.bytes_read == 0 {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
if encoding == TextEncoding::UTF16 {
|
||||
text_decode_options = text_decode_options.bom(first_key.bom);
|
||||
}
|
||||
|
||||
values.push((
|
||||
first_key.content,
|
||||
decode_text(reader, text_decode_options)?.content,
|
||||
));
|
||||
|
||||
loop {
|
||||
let key = decode_text(reader, encoding, true)?;
|
||||
let value = decode_text(reader, encoding, true)?;
|
||||
let key = decode_text(reader, text_decode_options)?;
|
||||
let value = decode_text(reader, text_decode_options)?;
|
||||
if key.bytes_read == 0 || value.bytes_read == 0 {
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
use crate::error::{Id3v2Error, Id3v2ErrorKind, Result};
|
||||
use crate::id3::v2::frame::content::verify_encoding;
|
||||
use crate::id3::v2::header::Id3v2Version;
|
||||
use crate::util::text::{decode_text, encode_text, TextEncoding};
|
||||
use crate::util::text::{decode_text, encode_text, TextDecodeOptions, TextEncoding};
|
||||
|
||||
use std::hash::{Hash, Hasher};
|
||||
use std::io::Read;
|
||||
|
@ -32,8 +32,12 @@ impl LanguageFrame {
|
|||
let mut language = [0; 3];
|
||||
reader.read_exact(&mut language)?;
|
||||
|
||||
let description = decode_text(reader, encoding, true)?.content;
|
||||
let content = decode_text(reader, encoding, false)?.content;
|
||||
let description = decode_text(
|
||||
reader,
|
||||
TextDecodeOptions::new().encoding(encoding).terminated(true),
|
||||
)?
|
||||
.content;
|
||||
let content = decode_text(reader, TextDecodeOptions::new().encoding(encoding))?.content;
|
||||
|
||||
Ok(Some(Self {
|
||||
encoding,
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
use crate::error::{ErrorKind, Id3v2Error, Id3v2ErrorKind, LoftyError, Result};
|
||||
use crate::util::text::{decode_text, encode_text, utf8_decode_str, TextEncoding};
|
||||
use crate::util::text::{
|
||||
decode_text, encode_text, utf8_decode_str, TextDecodeOptions, TextEncoding,
|
||||
};
|
||||
|
||||
use std::hash::Hash;
|
||||
use std::io::Read;
|
||||
|
@ -45,14 +47,20 @@ impl OwnershipFrame {
|
|||
|
||||
let encoding = TextEncoding::from_u8(encoding_byte)
|
||||
.ok_or_else(|| LoftyError::new(ErrorKind::TextDecode("Found invalid encoding")))?;
|
||||
let price_paid = decode_text(reader, TextEncoding::Latin1, true)?.content;
|
||||
let price_paid = decode_text(
|
||||
reader,
|
||||
TextDecodeOptions::new()
|
||||
.encoding(TextEncoding::Latin1)
|
||||
.terminated(true),
|
||||
)?
|
||||
.content;
|
||||
|
||||
let mut date_bytes = [0u8; 8];
|
||||
reader.read_exact(&mut date_bytes)?;
|
||||
|
||||
let date_of_purchase = utf8_decode_str(&date_bytes)?.to_owned();
|
||||
|
||||
let seller = decode_text(reader, encoding, false)?.content;
|
||||
let seller = decode_text(reader, TextDecodeOptions::new().encoding(encoding))?.content;
|
||||
|
||||
Ok(Some(OwnershipFrame {
|
||||
encoding,
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
use crate::error::Result;
|
||||
use crate::util::text::{decode_text, encode_text, TextEncoding};
|
||||
use crate::util::text::{decode_text, encode_text, TextDecodeOptions, TextEncoding};
|
||||
|
||||
use std::hash::{Hash, Hasher};
|
||||
use std::io::Read;
|
||||
|
@ -36,7 +36,12 @@ impl Popularimeter {
|
|||
where
|
||||
R: Read,
|
||||
{
|
||||
let email = decode_text(reader, TextEncoding::Latin1, true)?;
|
||||
let email = decode_text(
|
||||
reader,
|
||||
TextDecodeOptions::new()
|
||||
.encoding(TextEncoding::Latin1)
|
||||
.terminated(true),
|
||||
)?;
|
||||
let rating = reader.read_u8()?;
|
||||
|
||||
let mut counter_content = Vec::new();
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
use crate::error::Result;
|
||||
use crate::util::text::{decode_text, encode_text, TextEncoding};
|
||||
use crate::util::text::{decode_text, encode_text, TextDecodeOptions, TextEncoding};
|
||||
|
||||
use std::io::Read;
|
||||
|
||||
|
@ -28,7 +28,12 @@ impl PrivateFrame {
|
|||
where
|
||||
R: Read,
|
||||
{
|
||||
let Ok(owner) = decode_text(reader, TextEncoding::Latin1, true) else {
|
||||
let Ok(owner) = decode_text(
|
||||
reader,
|
||||
TextDecodeOptions::new()
|
||||
.encoding(TextEncoding::Latin1)
|
||||
.terminated(true),
|
||||
) else {
|
||||
return Ok(None);
|
||||
};
|
||||
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
use crate::error::{Id3v2Error, Id3v2ErrorKind, Result};
|
||||
use crate::macros::try_vec;
|
||||
use crate::probe::ParsingMode;
|
||||
use crate::util::text::{decode_text, encode_text, TextEncoding};
|
||||
use crate::util::text::{decode_text, encode_text, TextDecodeOptions, TextEncoding};
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::hash::{Hash, Hasher};
|
||||
|
@ -110,7 +110,13 @@ impl RelativeVolumeAdjustmentFrame {
|
|||
where
|
||||
R: Read,
|
||||
{
|
||||
let identification = decode_text(reader, TextEncoding::Latin1, true)?.content;
|
||||
let identification = decode_text(
|
||||
reader,
|
||||
TextDecodeOptions::new()
|
||||
.encoding(TextEncoding::Latin1)
|
||||
.terminated(true),
|
||||
)?
|
||||
.content;
|
||||
|
||||
let mut channels = HashMap::new();
|
||||
while let Ok(channel_type_byte) = reader.read_u8() {
|
||||
|
|
|
@ -1,7 +1,8 @@
|
|||
use crate::error::{ErrorKind, Id3v2Error, Id3v2ErrorKind, LoftyError, Result};
|
||||
use crate::macros::err;
|
||||
use crate::util::text::{
|
||||
decode_text, encode_text, read_to_terminator, utf16_decode_bytes, TextEncoding,
|
||||
decode_text, encode_text, read_to_terminator, utf16_decode_bytes, TextDecodeOptions,
|
||||
TextEncoding,
|
||||
};
|
||||
|
||||
use std::io::{Cursor, Read, Seek, SeekFrom, Write};
|
||||
|
@ -106,9 +107,12 @@ impl SynchronizedText {
|
|||
.ok_or_else(|| Id3v2Error::new(Id3v2ErrorKind::BadSyncText))?;
|
||||
|
||||
let mut cursor = Cursor::new(&data[6..]);
|
||||
let description = crate::util::text::decode_text(&mut cursor, encoding, true)
|
||||
.map_err(|_| Id3v2Error::new(Id3v2ErrorKind::BadSyncText))?
|
||||
.text_or_none();
|
||||
let description = crate::util::text::decode_text(
|
||||
&mut cursor,
|
||||
TextDecodeOptions::new().encoding(encoding).terminated(true),
|
||||
)
|
||||
.map_err(|_| Id3v2Error::new(Id3v2ErrorKind::BadSyncText))?
|
||||
.text_or_none();
|
||||
|
||||
let mut endianness: fn([u8; 2]) -> u16 = u16::from_le_bytes;
|
||||
|
||||
|
@ -154,8 +158,11 @@ impl SynchronizedText {
|
|||
}
|
||||
}
|
||||
|
||||
let decoded_text = decode_text(&mut cursor, encoding, true)
|
||||
.map_err(|_| Id3v2Error::new(Id3v2ErrorKind::BadSyncText))?;
|
||||
let decoded_text = decode_text(
|
||||
&mut cursor,
|
||||
TextDecodeOptions::new().encoding(encoding).terminated(true),
|
||||
)
|
||||
.map_err(|_| Id3v2Error::new(Id3v2ErrorKind::BadSyncText))?;
|
||||
pos += decoded_text.bytes_read as u64;
|
||||
|
||||
Ok(decoded_text.content)
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
use crate::error::Result;
|
||||
use crate::id3::v2::frame::content::verify_encoding;
|
||||
use crate::id3::v2::header::Id3v2Version;
|
||||
use crate::util::text::{decode_text, encode_text, TextEncoding};
|
||||
use crate::util::text::{decode_text, encode_text, TextDecodeOptions, TextEncoding};
|
||||
|
||||
use byteorder::ReadBytesExt;
|
||||
|
||||
|
@ -37,7 +37,7 @@ impl TextInformationFrame {
|
|||
};
|
||||
|
||||
let encoding = verify_encoding(encoding_byte, version)?;
|
||||
let value = decode_text(reader, encoding, false)?.content;
|
||||
let value = decode_text(reader, TextDecodeOptions::new().encoding(encoding))?.content;
|
||||
|
||||
Ok(Some(TextInformationFrame { encoding, value }))
|
||||
}
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
use crate::error::{Id3v2Error, Id3v2ErrorKind, Result};
|
||||
use crate::macros::parse_mode_choice;
|
||||
use crate::probe::ParsingMode;
|
||||
use crate::util::text::{decode_text, encode_text, TextEncoding};
|
||||
use crate::util::text::{decode_text, encode_text, TextDecodeOptions, TextEncoding};
|
||||
|
||||
use std::hash::{Hash, Hasher};
|
||||
use std::io::Read;
|
||||
|
@ -25,7 +25,12 @@ impl UniqueFileIdentifierFrame {
|
|||
where
|
||||
R: Read,
|
||||
{
|
||||
let owner_decode_result = decode_text(reader, TextEncoding::Latin1, true)?;
|
||||
let owner_decode_result = decode_text(
|
||||
reader,
|
||||
TextDecodeOptions::new()
|
||||
.encoding(TextEncoding::Latin1)
|
||||
.terminated(true),
|
||||
)?;
|
||||
|
||||
let owner;
|
||||
match owner_decode_result.text_or_none() {
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
use crate::error::Result;
|
||||
use crate::util::text::{decode_text, encode_text, TextEncoding};
|
||||
use crate::util::text::{decode_text, encode_text, TextDecodeOptions, TextEncoding};
|
||||
|
||||
use std::io::Read;
|
||||
|
||||
|
@ -19,7 +19,12 @@ impl UrlLinkFrame {
|
|||
where
|
||||
R: Read,
|
||||
{
|
||||
let url = decode_text(reader, TextEncoding::Latin1, true)?;
|
||||
let url = decode_text(
|
||||
reader,
|
||||
TextDecodeOptions::new()
|
||||
.encoding(TextEncoding::Latin1)
|
||||
.terminated(true),
|
||||
)?;
|
||||
if url.bytes_read == 0 {
|
||||
return Ok(None);
|
||||
}
|
||||
|
|
|
@ -17,7 +17,7 @@ use crate::picture::{Picture, PictureType, TOMBSTONE_PICTURE};
|
|||
use crate::tag::item::{ItemKey, ItemValue, TagItem};
|
||||
use crate::tag::{try_parse_year, Tag, TagType};
|
||||
use crate::traits::{Accessor, MergeTag, SplitTag, TagExt};
|
||||
use crate::util::text::{decode_text, TextEncoding};
|
||||
use crate::util::text::{decode_text, TextDecodeOptions, TextEncoding};
|
||||
|
||||
use std::borrow::Cow;
|
||||
use std::convert::TryInto;
|
||||
|
@ -1084,9 +1084,10 @@ impl SplitTag for Id3v2Tag {
|
|||
) => {
|
||||
if owner == MUSICBRAINZ_UFID_OWNER {
|
||||
let mut identifier = Cursor::new(identifier);
|
||||
let Ok(recording_id) =
|
||||
decode_text(&mut identifier, TextEncoding::Latin1, false)
|
||||
else {
|
||||
let Ok(recording_id) = decode_text(
|
||||
&mut identifier,
|
||||
TextDecodeOptions::new().encoding(TextEncoding::Latin1),
|
||||
) else {
|
||||
return true; // Keep frame
|
||||
};
|
||||
tag.items.push(TagItem::new(
|
||||
|
|
|
@ -60,20 +60,54 @@ const EMPTY_DECODED_TEXT: DecodeTextResult = DecodeTextResult {
|
|||
bom: [0, 0],
|
||||
};
|
||||
|
||||
pub(crate) fn decode_text<R>(
|
||||
reader: &mut R,
|
||||
encoding: TextEncoding,
|
||||
terminated: bool,
|
||||
) -> Result<DecodeTextResult>
|
||||
#[derive(Copy, Clone, Debug)]
|
||||
pub(crate) struct TextDecodeOptions {
|
||||
pub encoding: TextEncoding,
|
||||
pub terminated: bool,
|
||||
pub bom: [u8; 2],
|
||||
}
|
||||
|
||||
impl TextDecodeOptions {
|
||||
pub(crate) fn new() -> Self {
|
||||
Self::default()
|
||||
}
|
||||
|
||||
pub(crate) fn encoding(mut self, encoding: TextEncoding) -> Self {
|
||||
self.encoding = encoding;
|
||||
self
|
||||
}
|
||||
|
||||
pub(crate) fn terminated(mut self, terminated: bool) -> Self {
|
||||
self.terminated = terminated;
|
||||
self
|
||||
}
|
||||
|
||||
pub(crate) fn bom(mut self, bom: [u8; 2]) -> Self {
|
||||
self.bom = bom;
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for TextDecodeOptions {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
encoding: TextEncoding::UTF8,
|
||||
terminated: false,
|
||||
bom: [0, 0],
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn decode_text<R>(reader: &mut R, options: TextDecodeOptions) -> Result<DecodeTextResult>
|
||||
where
|
||||
R: Read,
|
||||
{
|
||||
let raw_bytes;
|
||||
let bytes_read;
|
||||
|
||||
if terminated {
|
||||
if let Some(bytes) = read_to_terminator(reader, encoding) {
|
||||
let null_terminator_length = match encoding {
|
||||
if options.terminated {
|
||||
if let Some(bytes) = read_to_terminator(reader, options.encoding) {
|
||||
let null_terminator_length = match options.encoding {
|
||||
TextEncoding::Latin1 | TextEncoding::UTF8 => 1,
|
||||
TextEncoding::UTF16 | TextEncoding::UTF16BE => 2,
|
||||
};
|
||||
|
@ -96,7 +130,7 @@ where
|
|||
}
|
||||
|
||||
let mut bom = [0, 0];
|
||||
let read_string = match encoding {
|
||||
let read_string = match options.encoding {
|
||||
TextEncoding::Latin1 => latin1_decode(&raw_bytes),
|
||||
TextEncoding::UTF16 => {
|
||||
if raw_bytes.len() < 2 {
|
||||
|
@ -107,12 +141,19 @@ where
|
|||
err!(TextDecode("UTF-16 string has an odd length"));
|
||||
}
|
||||
|
||||
match (raw_bytes[0], raw_bytes[1]) {
|
||||
(0xFE, 0xFF) => {
|
||||
let bom_to_check;
|
||||
if options.bom == [0, 0] {
|
||||
bom_to_check = [raw_bytes[0], raw_bytes[1]];
|
||||
} else {
|
||||
bom_to_check = options.bom;
|
||||
}
|
||||
|
||||
match bom_to_check {
|
||||
[0xFE, 0xFF] => {
|
||||
bom = [0xFE, 0xFF];
|
||||
utf16_decode_bytes(&raw_bytes[2..], u16::from_be_bytes)?
|
||||
},
|
||||
(0xFF, 0xFE) => {
|
||||
[0xFF, 0xFE] => {
|
||||
bom = [0xFF, 0xFE];
|
||||
utf16_decode_bytes(&raw_bytes[2..], u16::from_le_bytes)?
|
||||
},
|
||||
|
@ -280,7 +321,7 @@ fn utf16_encode(
|
|||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::util::text::TextEncoding;
|
||||
use crate::util::text::{TextDecodeOptions, TextEncoding};
|
||||
use std::io::Cursor;
|
||||
|
||||
const TEST_STRING: &str = "l\u{00f8}ft\u{00a5}";
|
||||
|
@ -303,16 +344,14 @@ mod tests {
|
|||
&mut Cursor::new(&[
|
||||
0xFE, 0xFF, 0x00, 0x6C, 0x00, 0xF8, 0x00, 0x66, 0x00, 0x74, 0x00, 0xA5, 0x00, 0x00,
|
||||
]),
|
||||
TextEncoding::UTF16,
|
||||
false,
|
||||
TextDecodeOptions::new().encoding(TextEncoding::UTF16),
|
||||
)
|
||||
.unwrap();
|
||||
let le_utf16_decode = super::decode_text(
|
||||
&mut Cursor::new(&[
|
||||
0xFF, 0xFE, 0x6C, 0x00, 0xF8, 0x00, 0x66, 0x00, 0x74, 0x00, 0xA5, 0x00, 0x00, 0x00,
|
||||
]),
|
||||
TextEncoding::UTF16,
|
||||
false,
|
||||
TextDecodeOptions::new().encoding(TextEncoding::UTF16),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
|
@ -320,8 +359,11 @@ mod tests {
|
|||
assert_eq!(be_utf16_decode.bytes_read, le_utf16_decode.bytes_read);
|
||||
assert_eq!(be_utf16_decode.content, TEST_STRING.to_string());
|
||||
|
||||
let utf8_decode =
|
||||
super::decode_text(&mut TEST_STRING.as_bytes(), TextEncoding::UTF8, false).unwrap();
|
||||
let utf8_decode = super::decode_text(
|
||||
&mut TEST_STRING.as_bytes(),
|
||||
TextDecodeOptions::new().encoding(TextEncoding::UTF8),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(utf8_decode.content, TEST_STRING.to_string());
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue