mirror of
https://github.com/uutils/coreutils
synced 2024-11-15 01:17:09 +00:00
Merge pull request #2340 from deantvv/more-unicode
more: fix unicode bug
This commit is contained in:
commit
9712ecb4d5
3 changed files with 50 additions and 16 deletions
2
Cargo.lock
generated
2
Cargo.lock
generated
|
@ -2215,6 +2215,8 @@ dependencies = [
|
||||||
"nix 0.13.1",
|
"nix 0.13.1",
|
||||||
"redox_syscall 0.1.57",
|
"redox_syscall 0.1.57",
|
||||||
"redox_termios",
|
"redox_termios",
|
||||||
|
"unicode-segmentation",
|
||||||
|
"unicode-width",
|
||||||
"uucore",
|
"uucore",
|
||||||
"uucore_procs",
|
"uucore_procs",
|
||||||
]
|
]
|
||||||
|
|
|
@ -20,6 +20,8 @@ uucore = { version = ">=0.0.7", package = "uucore", path = "../../uucore" }
|
||||||
uucore_procs = { version = ">=0.0.5", package = "uucore_procs", path = "../../uucore_procs" }
|
uucore_procs = { version = ">=0.0.5", package = "uucore_procs", path = "../../uucore_procs" }
|
||||||
crossterm = ">=0.19"
|
crossterm = ">=0.19"
|
||||||
atty = "0.2.14"
|
atty = "0.2.14"
|
||||||
|
unicode-width = "0.1.7"
|
||||||
|
unicode-segmentation = "1.7.1"
|
||||||
|
|
||||||
[target.'cfg(target_os = "redox")'.dependencies]
|
[target.'cfg(target_os = "redox")'.dependencies]
|
||||||
redox_termios = "0.1"
|
redox_termios = "0.1"
|
||||||
|
|
|
@ -29,6 +29,9 @@ use crossterm::{
|
||||||
terminal,
|
terminal,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
use unicode_segmentation::UnicodeSegmentation;
|
||||||
|
use unicode_width::UnicodeWidthStr;
|
||||||
|
|
||||||
pub mod options {
|
pub mod options {
|
||||||
pub const SILENT: &str = "silent";
|
pub const SILENT: &str = "silent";
|
||||||
pub const LOGICAL: &str = "logical";
|
pub const LOGICAL: &str = "logical";
|
||||||
|
@ -313,23 +316,30 @@ fn break_buff(buff: &str, cols: usize) -> Vec<String> {
|
||||||
lines
|
lines
|
||||||
}
|
}
|
||||||
|
|
||||||
fn break_line(mut line: &str, cols: usize) -> Vec<String> {
|
fn break_line(line: &str, cols: usize) -> Vec<String> {
|
||||||
let breaks = (line.len() / cols).saturating_add(1);
|
let width = UnicodeWidthStr::width(line);
|
||||||
let mut lines = Vec::with_capacity(breaks);
|
let mut lines = Vec::new();
|
||||||
// TODO: Use unicode width instead of the length in bytes.
|
if width < cols {
|
||||||
if line.len() < cols {
|
|
||||||
lines.push(line.to_string());
|
lines.push(line.to_string());
|
||||||
return lines;
|
return lines;
|
||||||
}
|
}
|
||||||
|
|
||||||
for _ in 1..=breaks {
|
let gr_idx = UnicodeSegmentation::grapheme_indices(line, true);
|
||||||
let (line1, line2) = line.split_at(cols);
|
let mut last_index = 0;
|
||||||
lines.push(line1.to_string());
|
let mut total_width = 0;
|
||||||
if line2.len() < cols {
|
for (index, grapheme) in gr_idx {
|
||||||
lines.push(line2.to_string());
|
let width = UnicodeWidthStr::width(grapheme);
|
||||||
break;
|
total_width += width;
|
||||||
|
|
||||||
|
if total_width > cols {
|
||||||
|
lines.push(line[last_index..index].to_string());
|
||||||
|
last_index = index;
|
||||||
|
total_width = width;
|
||||||
}
|
}
|
||||||
line = line2;
|
}
|
||||||
|
|
||||||
|
if last_index != line.len() {
|
||||||
|
lines.push(line[last_index..].to_string());
|
||||||
}
|
}
|
||||||
lines
|
lines
|
||||||
}
|
}
|
||||||
|
@ -363,6 +373,7 @@ fn make_prompt_and_flush(stdout: &mut Stdout, lower_mark: u16, lc: u16) {
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::{break_line, calc_range};
|
use super::{break_line, calc_range};
|
||||||
|
use unicode_width::UnicodeWidthStr;
|
||||||
|
|
||||||
// It is good to test the above functions
|
// It is good to test the above functions
|
||||||
#[test]
|
#[test]
|
||||||
|
@ -379,11 +390,12 @@ mod tests {
|
||||||
}
|
}
|
||||||
|
|
||||||
let lines = break_line(&test_string, 80);
|
let lines = break_line(&test_string, 80);
|
||||||
|
let widths: Vec<usize> = lines
|
||||||
|
.iter()
|
||||||
|
.map(|s| UnicodeWidthStr::width(&s[..]))
|
||||||
|
.collect();
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!((80, 80, 40), (widths[0], widths[1], widths[2]));
|
||||||
(80, 80, 40),
|
|
||||||
(lines[0].len(), lines[1].len(), lines[2].len())
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
@ -397,4 +409,22 @@ mod tests {
|
||||||
|
|
||||||
assert_eq!(20, lines[0].len());
|
assert_eq!(20, lines[0].len());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_break_line_zwj() {
|
||||||
|
let mut test_string = String::with_capacity(1100);
|
||||||
|
for _ in 0..20 {
|
||||||
|
test_string.push_str("👩🏻🔬");
|
||||||
|
}
|
||||||
|
|
||||||
|
let lines = break_line(&test_string, 80);
|
||||||
|
|
||||||
|
let widths: Vec<usize> = lines
|
||||||
|
.iter()
|
||||||
|
.map(|s| UnicodeWidthStr::width(&s[..]))
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
// Each 👩🏻🔬 is 6 character width it break line to the closest number to 80 => 6 * 13 = 78
|
||||||
|
assert_eq!((78, 42), (widths[0], widths[1]));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue