mirror of
https://github.com/rust-lang-nursery/rust-cookbook
synced 2024-11-24 20:43:07 +00:00
Add Extract unique Hashtag example. fixes #242
This commit is contained in:
parent
2df7875008
commit
f161a8bcab
2 changed files with 44 additions and 1 deletions
|
@ -14,6 +14,8 @@
|
|||
| [Maintain global mutable state][ex-global-mut-state] | [![lazy_static-badge]][lazy_static] | [![cat-rust-patterns-badge]][cat-rust-patterns] |
|
||||
| [Access a file randomly using a memory map][ex-random-file-access] | [![memmap-badge]][memmap] | [![cat-filesystem-badge]][cat-filesystem] |
|
||||
| [Define and operate on a type represented as a bitfield][ex-bitflags] | [![bitflags-badge]][bitflags] | [![cat-no-std-badge]][cat-no-std] |
|
||||
| [Extract a list of unique #Hashtags from a text][ex-extract-hashtags] | [![regex-badge]][regex] [![lazy_static-badge]][lazy_static] | [![cat-text-processing-badge]][cat-text-processing] |
|
||||
|
||||
|
||||
|
||||
[ex-std-read-lines]: #ex-std-read-lines
|
||||
|
@ -527,6 +529,43 @@ fn main() {
|
|||
}
|
||||
```
|
||||
|
||||
[ex-extract-hashtags]: #ex-extract-hashtags
|
||||
<a name="ex-extract-hashtags"></a>
|
||||
## Extract a list of unique #Hashtags from a text
|
||||
|
||||
[![regex-badge]][regex] [![lazy_static-badge]][lazy_static] [![cat-text-processing-badge]][cat-text-processing]
|
||||
|
||||
Extracts a sorted and deduplicated list of hashtags from a text.
|
||||
|
||||
The hashtag regex given here only catches latin hashtags that start with a letter. The complete [twitter hashtag regex] is way more complicated.
|
||||
|
||||
```rust
|
||||
extern crate regex;
|
||||
#[macro_use] extern crate lazy_static;
|
||||
|
||||
use regex::Regex;
|
||||
use std::collections::HashSet;
|
||||
|
||||
/// Note: A HashSet does not contain duplicate values.
|
||||
fn extract_hashtags(text: &str) -> HashSet<&str> {
|
||||
lazy_static! {
|
||||
static ref HASHTAG_REGEX : Regex = Regex::new(
|
||||
r"\#[a-zA-Z][0-9a-zA-Z_]*"
|
||||
).unwrap();
|
||||
}
|
||||
HASHTAG_REGEX.find_iter(text).map(|mat| mat.as_str()).collect()
|
||||
}
|
||||
|
||||
fn main() {
|
||||
let tweet = "Hey #world, I just got my new #dog, say hello to Till. #dog #forever #2 #_ ";
|
||||
let tags = extract_hashtags(tweet);
|
||||
assert!(tags.contains("#dog") && tags.contains("#forever") && tags.contains("#world"));
|
||||
assert_eq!(tags.len(), 3);
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
|
||||
<!-- Categories -->
|
||||
|
||||
[cat-no-std-badge]: https://badge-cache.kominick.com/badge/no_std--x.svg?style=social
|
||||
|
@ -594,4 +633,5 @@ fn main() {
|
|||
<!-- Reference -->
|
||||
|
||||
[race-condition-file]: https://en.wikipedia.org/wiki/Race_condition#File_systems
|
||||
[raw string literals]: https://doc.rust-lang.org/reference/tokens.html#raw-string-literals
|
||||
[raw string literals]: https://doc.rust-lang.org/reference/tokens.html#raw-string-literals
|
||||
[twitter hashtag regex]: https://github.com/twitter/twitter-text/blob/master/java/src/com/twitter/Regex.java#L255
|
||||
|
|
|
@ -32,6 +32,8 @@ community. It needs and welcomes help. For details see
|
|||
| [Maintain global mutable state][ex-global-mut-state] | [![lazy_static-badge]][lazy_static] | [![cat-rust-patterns-badge]][cat-rust-patterns] |
|
||||
| [Access a file randomly using a memory map][ex-random-file-access] | [![memmap-badge]][memmap] | [![cat-filesystem-badge]][cat-filesystem] |
|
||||
| [Define and operate on a type represented as a bitfield][ex-bitflags] | [![bitflags-badge]][bitflags] | [![cat-no-std-badge]][cat-no-std] |
|
||||
| [Extract a list of unique #Hashtags from a text][ex-extract-hashtags] | [![regex-badge]][regex] [![lazy_static-badge]][lazy_static] | [![cat-text-processing-badge]][cat-text-processing] |
|
||||
|
||||
|
||||
## [Encoding](encoding.html)
|
||||
|
||||
|
@ -233,6 +235,7 @@ Keep lines sorted.
|
|||
[ex-threadpool-fractal]: concurrency.html#ex-threadpool-fractal
|
||||
[ex-dedup-filenames]: app.html#ex-dedup-filenames
|
||||
[ex-extract-links-webpage]: net.html#ex-extract-links-webpage
|
||||
[ex-extract-hashtags]: basics.html#ex-extract-hashtags
|
||||
[ex-file-post]: net.html#ex-file-post
|
||||
[ex-file-predicate]: app.html#ex-file-predicate
|
||||
[ex-file-skip-dot]: app.html#ex-file-skip-dot
|
||||
|
|
Loading…
Reference in a new issue