add unicode-segmentation example (#517)

Thanks!
2024-11-22 03:23:05 +00:00 · 2019-04-12 13:05:36 -03:00 · 2019-04-12 13:05:36 -03:00 · f1ad9ad44c
commit f1ad9ad44c
parent 5824ee21eb
6 changed files with 34 additions and 2 deletions
--- a/Cargo.toml
+++ b/Cargo.toml
@ -8,6 +8,7 @@ publish = false
 build = "build.rs"

 [dependencies]
+ansi_term = "0.11.0"
 base64 = "0.9"
 bitflags = "1.0"
 byteorder = "1.0"
@ -27,6 +28,7 @@ log = "0.4"
 log4rs = "0.8"
 memmap = "0.7"
 mime = "0.3"
+nalgebra = "0.16.12"
 ndarray = "0.12"
 num = "0.2"
 num_cpus = "1.8"
@ -48,10 +50,9 @@ tar = "0.4.12"
 tempdir = "0.3.5"
 threadpool = "1.6"
 toml = "0.4"
+unicode-segmentation = "1.2.1"
 url = "1.6"
 walkdir = "2.0"
-ansi_term = "0.11.0"
-nalgebra = "0.16.12"

 [target.'cfg(target_os = "linux")'.dependencies]
 syslog = "4.0"
--- a/ci/dictionary.txt
+++ b/ci/dictionary.txt
@ -112,6 +112,8 @@ GitHub
 github
 GlobError
 Guybrush
+graphemes
+Graphemes
 GzDecoder
 GzEncoder
 Hackerman
@ -311,6 +313,7 @@ Tuple
 typesafe
 unary
 unix
+unicode
 unwinded
 UpperHex
 uptime
--- a/src/links.md
+++ b/src/links.md
@ -138,5 +138,7 @@ Keep lines sorted.
 [toml]: https://docs.rs/toml/
 [url-badge]: https://badge-cache.kominick.com/crates/v/url.svg?label=url
 [url]: https://docs.rs/url/
+[unicode-segmentation-badge]: https://badge-cache.kominick.com/crates/v/unicode-segmentation.svg?label=unicode-segmentation
+[unicode-segmentation]: https://docs.rs/unicode-segmentation/
 [walkdir-badge]: https://badge-cache.kominick.com/crates/v/walkdir.svg?label=walkdir
 [walkdir]: https://docs.rs/walkdir/
--- a/src/text.md
+++ b/src/text.md
@ -2,6 +2,7 @@

 | Recipe | Crates | Categories |
 |--------|--------|------------|
+| [Collect Unicode Graphemes][ex-unicode-graphemes] | [![unicode-segmentation-badge]][unicode-segmentation] | [![cat-encoding-badge]][cat-text-processing] |
 | [Verify and extract login from an email address][ex-verify-extract-email] | [![regex-badge]][regex] [![lazy_static-badge]][lazy_static] | [![cat-text-processing-badge]][cat-text-processing] |
 | [Extract a list of unique #Hashtags from a text][ex-extract-hashtags] | [![regex-badge]][regex] [![lazy_static-badge]][lazy_static] | [![cat-text-processing-badge]][cat-text-processing] |
 | [Extract phone numbers from text][ex-phone] | [![regex-badge]][regex] | [![cat-text-processing-badge]][cat-text-processing] |
@ -15,6 +16,7 @@
 [ex-regex-filter-log]: text/regex.html#filter-a-log-file-by-matching-multiple-regular-expressions
 [ex-regex-replace-named]: text/regex.html#replace-all-occurrences-of-one-text-pattern-with-another-pattern

+[ex-unicode-graphemes]: text/string_parsing.html#collect-unicode-graphemes
 [string_parsing-from_str]: text/string_parsing.html#implement-the-fromstr-trait-for-a-custom-struct

 {{#include links.md}}
--- a/src/text/string_parsing.md
+++ b/src/text/string_parsing.md
@ -1,5 +1,7 @@
 # String Parsing

+{{#include string_parsing/graphemes.md}}
+
 {{#include string_parsing/from_str.md}}

 {{#include ../links.md}}
--- a/src/text/string_parsing/graphemes.md
+++ b/src/text/string_parsing/graphemes.md
@ -0,0 +1,22 @@
+## Collect Unicode Graphemes
+
+[![unicode-segmentation-badge]][`unicode-segmentation`] [![cat-text-processing-badge]][cat-text-processing]
+
+Collect individual Unicode graphemes from UTF-8 string using the 
+[`UnicodeSegmentation::graphemes`] function from the [`unicode-segmentation`] crate.
+
+```rust
+#[macro_use]
+extern crate unicode_segmentation;
+use unicode_segmentation::UnicodeSegmentation;
+
+fn main() {
+    let name = "José Guimarães\r\n";
+    let graphemes = UnicodeSegmentation::graphemes(name, true)
+    	.collect::<Vec<&str>>();
+	assert_eq!(graphemes[3], "é");
+}
+```
+
+[`UnicodeSegmentation::graphemes`]: https://docs.rs/unicode-segmentation/*/unicode_segmentation/trait.UnicodeSegmentation.html#tymethod.graphemes
+[`unicode-segmentation`]: https://docs.rs/unicode-segmentation/1.2.1/unicode_segmentation/