Move fzf matcher into helper

This commit is contained in:
Dave Davenport 2017-01-11 09:20:19 +01:00
parent 56c787690f
commit 4452b08288
3 changed files with 190 additions and 171 deletions

View file

@ -206,5 +206,19 @@ char * rofi_latin_to_utf8_strdup ( const char *input, gssize length );
* @returns the updated retv list.
*/
PangoAttrList *token_match_get_pango_attr ( ThemeHighlight th, GRegex **tokens, const char *input, PangoAttrList *retv );
/**
* @param pattern The user input to match against.
* @param plen Pattern length.
* @param str The input to match against pattern.
* @param slen Lenght of str.
*
* FZF like fuzzy sorting algorithm.
*
* @returns the sorting weight.
*/
int rofi_scorer_fuzzy_evaluate ( const char *pattern, glong plen, const char *str, glong slen );
/*@}*/
#endif // ROFI_HELPER_H

View file

@ -29,6 +29,7 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <limits.h>
#include <unistd.h>
#include <errno.h>
#include <fcntl.h>
@ -720,3 +721,177 @@ char * rofi_force_utf8 ( gchar *start, ssize_t length )
return g_string_free ( string, FALSE );
}
/****
* FZF like scorer
*/
/** Max length of input to score. */
#define FUZZY_SCORER_MAX_LENGTH 256
/** minimum score */
#define MIN_SCORE ( INT_MIN / 2 )
/** Leading gap score */
#define LEADING_GAP_SCORE -4
/** gap score */
#define GAP_SCORE -5
/** start of word score */
#define WORD_START_SCORE 50
/** non-word score */
#define NON_WORD_SCORE 40
/** CamelCase score */
#define CAMEL_SCORE ( WORD_START_SCORE + GAP_SCORE - 1 )
/** Consecutive score */
#define CONSECUTIVE_SCORE ( WORD_START_SCORE + GAP_SCORE )
/** non-start multiplier */
#define PATTERN_NON_START_MULTIPLIER 1
/** start multiplier */
#define PATTERN_START_MULTIPLIER 2
/**
* Character classification.
*/
enum CharClass
{
/* Lower case */
LOWER,
/* Upper case */
UPPER,
/* Number */
DIGIT,
/* non word character */
NON_WORD
};
/**
* @param c The character to determine class of
*
* @returns the class of the character c.
*/
static enum CharClass rofi_scorer_get_character_class ( gunichar c )
{
if ( g_unichar_islower ( c ) ) {
return LOWER;
}
if ( g_unichar_isupper ( c ) ) {
return UPPER;
}
if ( g_unichar_isdigit ( c ) ) {
return DIGIT;
}
return NON_WORD;
}
/**
* @param prev The previous character.
* @param curr The current character
*
* Scrore the transition.
*
* @returns score of the transition.
*/
static int rofi_scorer_get_score_for ( enum CharClass prev, enum CharClass curr )
{
if ( prev == NON_WORD && curr != NON_WORD ) {
return WORD_START_SCORE;
}
if ( ( prev == LOWER && curr == UPPER ) ||
( prev != DIGIT && curr == DIGIT ) ) {
return CAMEL_SCORE;
}
if ( curr == NON_WORD ) {
return NON_WORD_SCORE;
}
return 0;
}
/**
* @param pattern The user input to match against.
* @param plen Pattern length.
* @param str The input to match against pattern.
* @param slen Lenght of str.
*
* rofi_scorer_fuzzy_evaluate implements a global sequence alignment algorithm to find the maximum accumulated score by
* aligning `pattern` to `str`. It applies when `pattern` is a subsequence of `str`.
*
* Scoring criteria
* - Prefer matches at the start of a word, or the start of subwords in CamelCase/camelCase/camel123 words. See WORD_START_SCORE/CAMEL_SCORE.
* - Non-word characters matter. See NON_WORD_SCORE.
* - The first characters of words of `pattern` receive bonus because they usually have more significance than the rest.
* See PATTERN_START_MULTIPLIER/PATTERN_NON_START_MULTIPLIER.
* - Superfluous characters in `str` will reduce the score (gap penalty). See GAP_SCORE.
* - Prefer early occurrence of the first character. See LEADING_GAP_SCORE/GAP_SCORE.
*
* The recurrence of the dynamic programming:
* dp[i][j]: maximum accumulated score by aligning pattern[0..i] to str[0..j]
* dp[0][j] = leading_gap_penalty(0, j) + score[j]
* dp[i][j] = max(dp[i-1][j-1] + CONSECUTIVE_SCORE, max(dp[i-1][k] + gap_penalty(k+1, j) + score[j] : k < j))
*
* The first dimension can be suppressed since we do not need a matching scheme, which reduces the space complexity from
* O(N*M) to O(M)
*
* @returns the sorting weight.
*/
int rofi_scorer_fuzzy_evaluate ( const char *pattern, glong plen, const char *str, glong slen )
{
if ( slen > FUZZY_SCORER_MAX_LENGTH ) {
return -MIN_SCORE;
}
if ( plen == 5 ) {
plen = plen;
}
glong pi, si;
// whether we are aligning the first character of pattern
gboolean pfirst = TRUE;
// whether the start of a word in pattern
gboolean pstart = TRUE;
// score for each position
int *score = g_malloc_n ( slen, sizeof ( int ) );
// dp[i]: maximum value by aligning pattern[0..pi] to str[0..si]
int *dp = g_malloc_n ( slen, sizeof ( int ) );
// uleft: value of the upper left cell; ulefts: maximum value of uleft and cells on the left. The arbitrary initial
// values suppress warnings.
int uleft = 0, ulefts = 0, left, lefts;
const gchar *pit = pattern, *sit;
enum CharClass prev = NON_WORD, cur;
for ( si = 0, sit = str; si < slen; si++, sit = g_utf8_next_char ( sit ) ) {
cur = rofi_scorer_get_character_class ( g_utf8_get_char ( sit ) );
score[si] = rofi_scorer_get_score_for ( prev, cur );
prev = cur;
dp[si] = MIN_SCORE;
}
for ( pi = 0; pi < plen; pi++, pit = g_utf8_next_char ( pit ) ) {
gunichar pc = g_utf8_get_char ( pit ), sc;
if ( g_unichar_isspace ( pc ) ) {
pstart = TRUE;
continue;
}
lefts = MIN_SCORE;
for ( si = 0, sit = str; si < slen; si++, sit = g_utf8_next_char ( sit ) ) {
left = dp[si];
lefts = MAX ( lefts + GAP_SCORE, left );
sc = g_utf8_get_char ( sit );
if ( config.case_sensitive
? pc == sc
: g_unichar_tolower ( pc ) == g_unichar_tolower ( sc ) ) {
int t = score[si] * ( pstart ? PATTERN_START_MULTIPLIER : PATTERN_NON_START_MULTIPLIER );
dp[si] = pfirst
? LEADING_GAP_SCORE * si + t
: MAX ( uleft + CONSECUTIVE_SCORE, ulefts + t );
}
else {
dp[si] = MIN_SCORE;
}
uleft = left;
ulefts = lefts;
}
pfirst = pstart = FALSE;
}
lefts = MIN_SCORE;
for ( si = 0; si < slen; si++ ) {
lefts = MAX ( lefts + GAP_SCORE, dp[si] );
}
g_free ( score );
g_free ( dp );
return -lefts;
}

View file

@ -25,7 +25,6 @@
*/
#include <config.h>
#include <limits.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@ -64,26 +63,6 @@
/** The Rofi View log domain */
#define LOG_DOMAIN "View"
/** Max length of input to score. */
#define FUZZY_SCORER_MAX_LENGTH 256
/** minimum score */
#define MIN_SCORE ( INT_MIN / 2 )
/** Leading gap score */
#define LEADING_GAP_SCORE -4
/** gap score */
#define GAP_SCORE -5
/** start of word score */
#define WORD_START_SCORE 50
/** non-word score */
#define NON_WORD_SCORE 40
/** CamelCase score */
#define CAMEL_SCORE ( WORD_START_SCORE + GAP_SCORE - 1 )
/** Consecutive score */
#define CONSECUTIVE_SCORE ( WORD_START_SCORE + GAP_SCORE )
/** non-start multiplier */
#define PATTERN_NON_START_MULTIPLIER 1
/** start multiplier */
#define PATTERN_START_MULTIPLIER 2
#include "xcb.h"
/**
@ -562,154 +541,6 @@ static void rofi_view_call_thread ( gpointer data, gpointer user_data )
g_mutex_unlock ( t->mutex );
}
/**
* Character classification.
*/
enum CharClass
{
/* Lower case */
LOWER,
/* Upper case */
UPPER,
/* Number */
DIGIT,
/* non word character */
NON_WORD
};
/**
* @param c The character to determine class of
*
* @returns the class of the character c.
*/
static enum CharClass rofi_scorer_get_character_class ( gunichar c )
{
if ( g_unichar_islower ( c ) ) {
return LOWER;
}
if ( g_unichar_isupper ( c ) ) {
return UPPER;
}
if ( g_unichar_isdigit ( c ) ) {
return DIGIT;
}
return NON_WORD;
}
/**
* @param prev The previous character.
* @param curr The current character
*
* Scrore the transition.
*
* @returns score of the transition.
*/
static int rofi_scorer_get_score_for ( enum CharClass prev, enum CharClass curr )
{
if ( prev == NON_WORD && curr != NON_WORD ) {
return WORD_START_SCORE;
}
if ( ( prev == LOWER && curr == UPPER ) ||
( prev != DIGIT && curr == DIGIT ) ) {
return CAMEL_SCORE;
}
if ( curr == NON_WORD ) {
return NON_WORD_SCORE;
}
return 0;
}
/**
* @param pattern The user input to match against.
* @param plen Pattern length.
* @param str The input to match against pattern.
* @param slen Lenght of str.
*
* rofi_scorer_fuzzy_evaluate implements a global sequence alignment algorithm to find the maximum accumulated score by
* aligning `pattern` to `str`. It applies when `pattern` is a subsequence of `str`.
*
* Scoring criteria
* - Prefer matches at the start of a word, or the start of subwords in CamelCase/camelCase/camel123 words. See WORD_START_SCORE/CAMEL_SCORE.
* - Non-word characters matter. See NON_WORD_SCORE.
* - The first characters of words of `pattern` receive bonus because they usually have more significance than the rest.
* See PATTERN_START_MULTIPLIER/PATTERN_NON_START_MULTIPLIER.
* - Superfluous characters in `str` will reduce the score (gap penalty). See GAP_SCORE.
* - Prefer early occurrence of the first character. See LEADING_GAP_SCORE/GAP_SCORE.
*
* The recurrence of the dynamic programming:
* dp[i][j]: maximum accumulated score by aligning pattern[0..i] to str[0..j]
* dp[0][j] = leading_gap_penalty(0, j) + score[j]
* dp[i][j] = max(dp[i-1][j-1] + CONSECUTIVE_SCORE, max(dp[i-1][k] + gap_penalty(k+1, j) + score[j] : k < j))
*
* The first dimension can be suppressed since we do not need a matching scheme, which reduces the space complexity from
* O(N*M) to O(M)
*
* @returns the sorting weight.
*/
static int rofi_scorer_fuzzy_evaluate ( const char *pattern, glong plen, const char *str, glong slen )
{
if ( slen > FUZZY_SCORER_MAX_LENGTH ) {
return MIN_SCORE;
}
if ( plen == 5 ) {
plen = plen;
}
glong pi, si;
// whether we are aligning the first character of pattern
gboolean pfirst = TRUE;
// whether the start of a word in pattern
gboolean pstart = TRUE;
// score for each position
int *score = g_malloc_n ( slen, sizeof ( int ) );
// dp[i]: maximum value by aligning pattern[0..pi] to str[0..si]
int *dp = g_malloc_n ( slen, sizeof ( int ) );
// uleft: value of the upper left cell; ulefts: maximum value of uleft and cells on the left. The arbitrary initial
// values suppress warnings.
int uleft = 0, ulefts = 0, left, lefts;
const gchar *pit = pattern, *sit;
enum CharClass prev = NON_WORD, cur;
for ( si = 0, sit = str; si < slen; si++, sit = g_utf8_next_char ( sit ) ) {
cur = rofi_scorer_get_character_class ( g_utf8_get_char ( sit ) );
score[si] = rofi_scorer_get_score_for ( prev, cur );
prev = cur;
dp[si] = MIN_SCORE;
}
for ( pi = 0; pi < plen; pi++, pit = g_utf8_next_char ( pit ) ) {
gunichar pc = g_utf8_get_char ( pit ), sc;
if ( g_unichar_isspace ( pc ) ) {
pstart = TRUE;
continue;
}
lefts = MIN_SCORE;
for ( si = 0, sit = str; si < slen; si++, sit = g_utf8_next_char ( sit ) ) {
left = dp[si];
lefts = MAX ( lefts + GAP_SCORE, left );
sc = g_utf8_get_char ( sit );
if ( config.case_sensitive
? pc == sc
: g_unichar_tolower ( pc ) == g_unichar_tolower ( sc ) ) {
int t = score[si] * ( pstart ? PATTERN_START_MULTIPLIER : PATTERN_NON_START_MULTIPLIER );
dp[si] = pfirst
? LEADING_GAP_SCORE * si + t
: MAX ( uleft + CONSECUTIVE_SCORE, ulefts + t );
}
else {
dp[si] = MIN_SCORE;
}
uleft = left;
ulefts = lefts;
}
pfirst = pstart = FALSE;
}
lefts = MIN_SCORE;
for ( si = 0; si < slen; si++ ) {
lefts = MAX ( lefts + GAP_SCORE, dp[si] );
}
g_free ( score );
g_free ( dp );
return lefts;
}
static void filter_elements ( thread_state *t, G_GNUC_UNUSED gpointer user_data )
{
char *pattern = NULL;
@ -726,7 +557,7 @@ static void filter_elements ( thread_state *t, G_GNUC_UNUSED gpointer user_data
if ( config.matching_method == MM_FUZZY ) {
char *str = mode_get_completion ( t->state->sw, i );
glong slen = g_utf8_strlen ( str, -1 );
t->state->distance[i] = -rofi_scorer_fuzzy_evaluate ( pattern, plen, str, slen );
t->state->distance[i] = rofi_scorer_fuzzy_evaluate ( pattern, plen, str, slen );
g_free ( str );
}
else if ( config.levenshtein_sort ) {
@ -742,7 +573,6 @@ static void filter_elements ( thread_state *t, G_GNUC_UNUSED gpointer user_data
g_free ( pattern );
}
}
static void rofi_view_setup_fake_transparency ( const char const *fake_background )
{
if ( CacheState.fake_bg == NULL ) {