Add support for utf16 files

This commit is contained in:
Jonathan Turner 2019-08-12 16:11:42 +12:00
parent 7c4a4ec62e
commit 6cf3dc92fc
6 changed files with 149 additions and 23 deletions

6
Cargo.lock generated
View file

@ -1979,7 +1979,7 @@ dependencies = [
"regex 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
"reqwest 0.9.19 (registry+https://github.com/rust-lang/crates.io-index)",
"roxmltree 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)",
"rustyline 5.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
"rustyline 5.0.1 (git+https://github.com/kkawakam/rustyline.git)",
"semver 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)",
"serde 1.0.98 (registry+https://github.com/rust-lang/crates.io-index)",
"serde-hjson 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)",
@ -2790,7 +2790,7 @@ dependencies = [
[[package]]
name = "rustyline"
version = "5.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "git+https://github.com/kkawakam/rustyline.git#568c9d0512b065e9eef68a6e46407881d2376738"
dependencies = [
"dirs 2.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
"libc 0.2.60 (registry+https://github.com/rust-lang/crates.io-index)",
@ -4036,7 +4036,7 @@ dependencies = [
"checksum rustc-demangle 0.1.15 (registry+https://github.com/rust-lang/crates.io-index)" = "a7f4dccf6f4891ebcc0c39f9b6eb1a83b9bf5d747cb439ec6fba4f3b977038af"
"checksum rustc-hash 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "7540fc8b0c49f096ee9c961cda096467dce8084bec6bdca2fc83895fd9b28cb8"
"checksum rustc_version 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "138e3e0acb6c9fb258b19b67cb8abd63c00679d2851805ea151465464fe9030a"
"checksum rustyline 5.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "b7d4ca3c9586d2c1f742284f032e328313ea55f3f60a3b0a17e2ca1a2bf9ae22"
"checksum rustyline 5.0.1 (git+https://github.com/kkawakam/rustyline.git)" = "<none>"
"checksum ryu 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "c92464b447c0ee8c4fb3824ecc8383b81717b9f1e74ba2e72540aef7b9f82997"
"checksum safemem 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "e133ccc4f4d1cd4f89cc8a7ff618287d56dc7f638b8e38fc32c5fdcadc339dd5"
"checksum same-file 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)" = "585e8ddcedc187886a30fa705c47985c3fa88d06624095856b36ca0b82ff4421"

View file

@ -394,6 +394,10 @@ mod tests {
Res {
loc: fixtures().join("sgml_description.json"),
at: 0
},
Res {
loc: fixtures().join("utf16.ini"),
at: 0
}
]
);

View file

@ -33,11 +33,17 @@ fn get_member(path: &Tagged<String>, obj: &Tagged<Value>) -> Result<Tagged<Value
match current.get_data_by_key(p) {
Some(v) => current = v,
None => {
return Err(ShellError::labeled_error(
"Unknown field",
"object missing field",
path.span(),
));
// Before we give up, see if they gave us a path that matches a field name by itself
match obj.get_data_by_key(&path.item) {
Some(v) => return Ok(v.clone()),
None => {
return Err(ShellError::labeled_error(
"Unknown field",
"object missing field",
path.span(),
));
}
}
}
}
}
@ -49,11 +55,6 @@ pub fn get(
GetArgs { rest: fields }: GetArgs,
RunnableContext { input, .. }: RunnableContext,
) -> Result<OutputStream, ShellError> {
// If it's a number, get the row instead of the column
// if let Some(amount) = amount {
// return Ok(input.values.skip(amount as u64).take(1).from_input_stream());
// }
let stream = input
.values
.map(move |item| {

View file

@ -9,7 +9,6 @@ use mime::Mime;
use std::path::{Path, PathBuf};
use std::str::FromStr;
use uuid::Uuid;
pub struct Open;
#[derive(Deserialize)]
@ -239,15 +238,96 @@ pub fn fetch(
},
SpanSource::File(cwd.to_string_lossy().to_string()),
)),
Err(_) => Ok((
None,
Value::Binary(bytes),
Tag {
span,
origin: Some(Uuid::new_v4()),
},
SpanSource::File(cwd.to_string_lossy().to_string()),
)),
Err(_) => {
//Non utf8 data.
match (bytes.get(0), bytes.get(1)) {
(Some(x), Some(y)) if *x == 0xff && *y == 0xfe => {
// Possibly UTF-16 little endian
let utf16 = read_le_u16(&bytes[2..]);
if let Some(utf16) = utf16 {
match std::string::String::from_utf16(&utf16) {
Ok(s) => Ok((
cwd.extension()
.map(|name| name.to_string_lossy().to_string()),
Value::string(s),
Tag {
span,
origin: Some(Uuid::new_v4()),
},
SpanSource::File(cwd.to_string_lossy().to_string()),
)),
Err(_) => Ok((
None,
Value::Binary(bytes),
Tag {
span,
origin: Some(Uuid::new_v4()),
},
SpanSource::File(cwd.to_string_lossy().to_string()),
)),
}
} else {
Ok((
None,
Value::Binary(bytes),
Tag {
span,
origin: Some(Uuid::new_v4()),
},
SpanSource::File(cwd.to_string_lossy().to_string()),
))
}
}
(Some(x), Some(y)) if *x == 0xfe && *y == 0xff => {
// Possibly UTF-16 big endian
let utf16 = read_be_u16(&bytes[2..]);
if let Some(utf16) = utf16 {
match std::string::String::from_utf16(&utf16) {
Ok(s) => Ok((
cwd.extension()
.map(|name| name.to_string_lossy().to_string()),
Value::string(s),
Tag {
span,
origin: Some(Uuid::new_v4()),
},
SpanSource::File(cwd.to_string_lossy().to_string()),
)),
Err(_) => Ok((
None,
Value::Binary(bytes),
Tag {
span,
origin: Some(Uuid::new_v4()),
},
SpanSource::File(cwd.to_string_lossy().to_string()),
)),
}
} else {
Ok((
None,
Value::Binary(bytes),
Tag {
span,
origin: Some(Uuid::new_v4()),
},
SpanSource::File(cwd.to_string_lossy().to_string()),
))
}
}
_ => Ok((
None,
Value::Binary(bytes),
Tag {
span,
origin: Some(Uuid::new_v4()),
},
SpanSource::File(cwd.to_string_lossy().to_string()),
)),
}
}
},
Err(_) => {
return Err(ShellError::labeled_error(
@ -267,6 +347,36 @@ pub fn fetch(
}
}
fn read_le_u16(input: &[u8]) -> Option<Vec<u16>> {
if input.len() % 2 != 0 || input.len() < 2 {
None
} else {
let mut result = vec![];
let mut pos = 0;
while pos < input.len() {
result.push(u16::from_le_bytes([input[pos], input[pos + 1]]));
pos += 2;
}
Some(result)
}
}
fn read_be_u16(input: &[u8]) -> Option<Vec<u16>> {
if input.len() % 2 != 0 || input.len() < 2 {
None
} else {
let mut result = vec![];
let mut pos = 0;
while pos < input.len() {
result.push(u16::from_be_bytes([input[pos], input[pos + 1]]));
pos += 2;
}
Some(result)
}
}
pub fn parse_as_value(
extension: Option<String>,
contents: String,

View file

@ -71,6 +71,17 @@ fn open_can_parse_ini() {
assert_eq!(output, "1234")
}
#[test]
fn open_can_parse_utf16_ini() {
nu!(
output,
cwd("tests/fixtures/formats"),
"open utf16.ini | get .ShellClassInfo | get IconIndex | echo $it"
);
assert_eq!(output, "-236")
}
#[test]
fn open_error_if_file_not_found() {
nu_error!(

BIN
tests/fixtures/formats/utf16.ini vendored Normal file

Binary file not shown.