Merge pull request #2340 from deantvv/more-unicode

more: fix unicode bug
This commit is contained in:
Sylvestre Ledru 2021-06-04 19:51:32 +02:00 committed by GitHub
commit 9712ecb4d5
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 50 additions and 16 deletions

2
Cargo.lock generated
View file

@ -2215,6 +2215,8 @@ dependencies = [
"nix 0.13.1", "nix 0.13.1",
"redox_syscall 0.1.57", "redox_syscall 0.1.57",
"redox_termios", "redox_termios",
"unicode-segmentation",
"unicode-width",
"uucore", "uucore",
"uucore_procs", "uucore_procs",
] ]

View file

@ -20,6 +20,8 @@ uucore = { version = ">=0.0.7", package = "uucore", path = "../../uucore" }
uucore_procs = { version = ">=0.0.5", package = "uucore_procs", path = "../../uucore_procs" } uucore_procs = { version = ">=0.0.5", package = "uucore_procs", path = "../../uucore_procs" }
crossterm = ">=0.19" crossterm = ">=0.19"
atty = "0.2.14" atty = "0.2.14"
unicode-width = "0.1.7"
unicode-segmentation = "1.7.1"
[target.'cfg(target_os = "redox")'.dependencies] [target.'cfg(target_os = "redox")'.dependencies]
redox_termios = "0.1" redox_termios = "0.1"

View file

@ -29,6 +29,9 @@ use crossterm::{
terminal, terminal,
}; };
use unicode_segmentation::UnicodeSegmentation;
use unicode_width::UnicodeWidthStr;
pub mod options { pub mod options {
pub const SILENT: &str = "silent"; pub const SILENT: &str = "silent";
pub const LOGICAL: &str = "logical"; pub const LOGICAL: &str = "logical";
@ -313,23 +316,30 @@ fn break_buff(buff: &str, cols: usize) -> Vec<String> {
lines lines
} }
fn break_line(mut line: &str, cols: usize) -> Vec<String> { fn break_line(line: &str, cols: usize) -> Vec<String> {
let breaks = (line.len() / cols).saturating_add(1); let width = UnicodeWidthStr::width(line);
let mut lines = Vec::with_capacity(breaks); let mut lines = Vec::new();
// TODO: Use unicode width instead of the length in bytes. if width < cols {
if line.len() < cols {
lines.push(line.to_string()); lines.push(line.to_string());
return lines; return lines;
} }
for _ in 1..=breaks { let gr_idx = UnicodeSegmentation::grapheme_indices(line, true);
let (line1, line2) = line.split_at(cols); let mut last_index = 0;
lines.push(line1.to_string()); let mut total_width = 0;
if line2.len() < cols { for (index, grapheme) in gr_idx {
lines.push(line2.to_string()); let width = UnicodeWidthStr::width(grapheme);
break; total_width += width;
if total_width > cols {
lines.push(line[last_index..index].to_string());
last_index = index;
total_width = width;
} }
line = line2; }
if last_index != line.len() {
lines.push(line[last_index..].to_string());
} }
lines lines
} }
@ -363,6 +373,7 @@ fn make_prompt_and_flush(stdout: &mut Stdout, lower_mark: u16, lc: u16) {
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::{break_line, calc_range}; use super::{break_line, calc_range};
use unicode_width::UnicodeWidthStr;
// It is good to test the above functions // It is good to test the above functions
#[test] #[test]
@ -379,11 +390,12 @@ mod tests {
} }
let lines = break_line(&test_string, 80); let lines = break_line(&test_string, 80);
let widths: Vec<usize> = lines
.iter()
.map(|s| UnicodeWidthStr::width(&s[..]))
.collect();
assert_eq!( assert_eq!((80, 80, 40), (widths[0], widths[1], widths[2]));
(80, 80, 40),
(lines[0].len(), lines[1].len(), lines[2].len())
);
} }
#[test] #[test]
@ -397,4 +409,22 @@ mod tests {
assert_eq!(20, lines[0].len()); assert_eq!(20, lines[0].len());
} }
#[test]
fn test_break_line_zwj() {
let mut test_string = String::with_capacity(1100);
for _ in 0..20 {
test_string.push_str("👩🏻‍🔬");
}
let lines = break_line(&test_string, 80);
let widths: Vec<usize> = lines
.iter()
.map(|s| UnicodeWidthStr::width(&s[..]))
.collect();
// Each 👩🏻‍🔬 is 6 character width it break line to the closest number to 80 => 6 * 13 = 78
assert_eq!((78, 42), (widths[0], widths[1]));
}
} }