add unicode-segmentation example (#517)

Thanks!
This commit is contained in:
Jonas Galvez 2019-04-12 13:05:36 -03:00 committed by Andrew Gauger
parent 5824ee21eb
commit f1ad9ad44c
6 changed files with 34 additions and 2 deletions

View file

@ -8,6 +8,7 @@ publish = false
build = "build.rs"
[dependencies]
ansi_term = "0.11.0"
base64 = "0.9"
bitflags = "1.0"
byteorder = "1.0"
@ -27,6 +28,7 @@ log = "0.4"
log4rs = "0.8"
memmap = "0.7"
mime = "0.3"
nalgebra = "0.16.12"
ndarray = "0.12"
num = "0.2"
num_cpus = "1.8"
@ -48,10 +50,9 @@ tar = "0.4.12"
tempdir = "0.3.5"
threadpool = "1.6"
toml = "0.4"
unicode-segmentation = "1.2.1"
url = "1.6"
walkdir = "2.0"
ansi_term = "0.11.0"
nalgebra = "0.16.12"
[target.'cfg(target_os = "linux")'.dependencies]
syslog = "4.0"

View file

@ -112,6 +112,8 @@ GitHub
github
GlobError
Guybrush
graphemes
Graphemes
GzDecoder
GzEncoder
Hackerman
@ -311,6 +313,7 @@ Tuple
typesafe
unary
unix
unicode
unwinded
UpperHex
uptime

View file

@ -138,5 +138,7 @@ Keep lines sorted.
[toml]: https://docs.rs/toml/
[url-badge]: https://badge-cache.kominick.com/crates/v/url.svg?label=url
[url]: https://docs.rs/url/
[unicode-segmentation-badge]: https://badge-cache.kominick.com/crates/v/unicode-segmentation.svg?label=unicode-segmentation
[unicode-segmentation]: https://docs.rs/unicode-segmentation/
[walkdir-badge]: https://badge-cache.kominick.com/crates/v/walkdir.svg?label=walkdir
[walkdir]: https://docs.rs/walkdir/

View file

@ -2,6 +2,7 @@
| Recipe | Crates | Categories |
|--------|--------|------------|
| [Collect Unicode Graphemes][ex-unicode-graphemes] | [![unicode-segmentation-badge]][unicode-segmentation] | [![cat-encoding-badge]][cat-text-processing] |
| [Verify and extract login from an email address][ex-verify-extract-email] | [![regex-badge]][regex] [![lazy_static-badge]][lazy_static] | [![cat-text-processing-badge]][cat-text-processing] |
| [Extract a list of unique #Hashtags from a text][ex-extract-hashtags] | [![regex-badge]][regex] [![lazy_static-badge]][lazy_static] | [![cat-text-processing-badge]][cat-text-processing] |
| [Extract phone numbers from text][ex-phone] | [![regex-badge]][regex] | [![cat-text-processing-badge]][cat-text-processing] |
@ -15,6 +16,7 @@
[ex-regex-filter-log]: text/regex.html#filter-a-log-file-by-matching-multiple-regular-expressions
[ex-regex-replace-named]: text/regex.html#replace-all-occurrences-of-one-text-pattern-with-another-pattern
[ex-unicode-graphemes]: text/string_parsing.html#collect-unicode-graphemes
[string_parsing-from_str]: text/string_parsing.html#implement-the-fromstr-trait-for-a-custom-struct
{{#include links.md}}

View file

@ -1,5 +1,7 @@
# String Parsing
{{#include string_parsing/graphemes.md}}
{{#include string_parsing/from_str.md}}
{{#include ../links.md}}

View file

@ -0,0 +1,22 @@
## Collect Unicode Graphemes
[![unicode-segmentation-badge]][`unicode-segmentation`] [![cat-text-processing-badge]][cat-text-processing]
Collect individual Unicode graphemes from UTF-8 string using the
[`UnicodeSegmentation::graphemes`] function from the [`unicode-segmentation`] crate.
```rust
#[macro_use]
extern crate unicode_segmentation;
use unicode_segmentation::UnicodeSegmentation;
fn main() {
let name = "José Guimarães\r\n";
let graphemes = UnicodeSegmentation::graphemes(name, true)
.collect::<Vec<&str>>();
assert_eq!(graphemes[3], "é");
}
```
[`UnicodeSegmentation::graphemes`]: https://docs.rs/unicode-segmentation/*/unicode_segmentation/trait.UnicodeSegmentation.html#tymethod.graphemes
[`unicode-segmentation`]: https://docs.rs/unicode-segmentation/1.2.1/unicode_segmentation/