mirror of
https://github.com/superseriousbusiness/gotosocial
synced 2024-11-26 22:30:25 +00:00
[performance] cache v2 filter keyword regular expressions (#2903)
* add caching of filterkeyword regular expressions * formatting * fix WholeWord nil check
This commit is contained in:
parent
6c0d93c6cb
commit
b092da6d28
5 changed files with 85 additions and 36 deletions
5
internal/cache/db.go
vendored
5
internal/cache/db.go
vendored
|
@ -531,6 +531,11 @@ func (c *Caches) initFilterKeyword() {
|
||||||
// See internal/db/bundb/filter.go.
|
// See internal/db/bundb/filter.go.
|
||||||
filterKeyword2.Filter = nil
|
filterKeyword2.Filter = nil
|
||||||
|
|
||||||
|
// We specifically DO NOT unset
|
||||||
|
// the regexp field here, as any
|
||||||
|
// regexp.Regexp instance is safe
|
||||||
|
// for concurrent access.
|
||||||
|
|
||||||
return filterKeyword2
|
return filterKeyword2
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -25,6 +25,7 @@ import (
|
||||||
"github.com/superseriousbusiness/gotosocial/internal/gtscontext"
|
"github.com/superseriousbusiness/gotosocial/internal/gtscontext"
|
||||||
"github.com/superseriousbusiness/gotosocial/internal/gtserror"
|
"github.com/superseriousbusiness/gotosocial/internal/gtserror"
|
||||||
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
|
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
|
||||||
|
"github.com/superseriousbusiness/gotosocial/internal/log"
|
||||||
"github.com/superseriousbusiness/gotosocial/internal/util"
|
"github.com/superseriousbusiness/gotosocial/internal/util"
|
||||||
"github.com/uptrace/bun"
|
"github.com/uptrace/bun"
|
||||||
)
|
)
|
||||||
|
@ -34,12 +35,22 @@ func (f *filterDB) GetFilterKeywordByID(ctx context.Context, id string) (*gtsmod
|
||||||
"ID",
|
"ID",
|
||||||
func() (*gtsmodel.FilterKeyword, error) {
|
func() (*gtsmodel.FilterKeyword, error) {
|
||||||
var filterKeyword gtsmodel.FilterKeyword
|
var filterKeyword gtsmodel.FilterKeyword
|
||||||
err := f.db.
|
|
||||||
|
// Scan from DB.
|
||||||
|
if err := f.db.
|
||||||
NewSelect().
|
NewSelect().
|
||||||
Model(&filterKeyword).
|
Model(&filterKeyword).
|
||||||
Where("? = ?", bun.Ident("id"), id).
|
Where("? = ?", bun.Ident("id"), id).
|
||||||
Scan(ctx)
|
Scan(ctx); err != nil {
|
||||||
return &filterKeyword, err
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Pre-compile filter keyword regular expression.
|
||||||
|
if err := filterKeyword.Compile(); err != nil {
|
||||||
|
return nil, gtserror.Newf("error compiling filter keyword regex: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return &filterKeyword, nil
|
||||||
},
|
},
|
||||||
id,
|
id,
|
||||||
)
|
)
|
||||||
|
@ -57,20 +68,20 @@ func (f *filterDB) GetFilterKeywordByID(ctx context.Context, id string) (*gtsmod
|
||||||
return filterKeyword, nil
|
return filterKeyword, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (f *filterDB) populateFilterKeyword(ctx context.Context, filterKeyword *gtsmodel.FilterKeyword) error {
|
func (f *filterDB) populateFilterKeyword(ctx context.Context, filterKeyword *gtsmodel.FilterKeyword) (err error) {
|
||||||
if filterKeyword.Filter == nil {
|
if filterKeyword.Filter == nil {
|
||||||
// Filter is not set, fetch from the cache or database.
|
// Filter is not set, fetch from the cache or database.
|
||||||
filter, err := f.state.DB.GetFilterByID(
|
filterKeyword.Filter, err = f.state.DB.GetFilterByID(
|
||||||
// Don't populate the filter with all of its keywords and statuses or we'll just end up back here.
|
|
||||||
|
// Don't populate the filter with all of its keywords
|
||||||
|
// and statuses or we'll just end up back here.
|
||||||
gtscontext.SetBarebones(ctx),
|
gtscontext.SetBarebones(ctx),
|
||||||
filterKeyword.FilterID,
|
filterKeyword.FilterID,
|
||||||
)
|
)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
filterKeyword.Filter = filter
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -84,6 +95,7 @@ func (f *filterDB) GetFilterKeywordsForAccountID(ctx context.Context, accountID
|
||||||
|
|
||||||
func (f *filterDB) getFilterKeywords(ctx context.Context, idColumn string, id string) ([]*gtsmodel.FilterKeyword, error) {
|
func (f *filterDB) getFilterKeywords(ctx context.Context, idColumn string, id string) ([]*gtsmodel.FilterKeyword, error) {
|
||||||
var filterKeywordIDs []string
|
var filterKeywordIDs []string
|
||||||
|
|
||||||
if err := f.db.
|
if err := f.db.
|
||||||
NewSelect().
|
NewSelect().
|
||||||
Model((*gtsmodel.FilterKeyword)(nil)).
|
Model((*gtsmodel.FilterKeyword)(nil)).
|
||||||
|
@ -92,6 +104,7 @@ func (f *filterDB) getFilterKeywords(ctx context.Context, idColumn string, id st
|
||||||
Scan(ctx, &filterKeywordIDs); err != nil {
|
Scan(ctx, &filterKeywordIDs); err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(filterKeywordIDs) == 0 {
|
if len(filterKeywordIDs) == 0 {
|
||||||
return nil, nil
|
return nil, nil
|
||||||
}
|
}
|
||||||
|
@ -101,6 +114,8 @@ func (f *filterDB) getFilterKeywords(ctx context.Context, idColumn string, id st
|
||||||
filterKeywordIDs,
|
filterKeywordIDs,
|
||||||
func(uncachedFilterKeywordIDs []string) ([]*gtsmodel.FilterKeyword, error) {
|
func(uncachedFilterKeywordIDs []string) ([]*gtsmodel.FilterKeyword, error) {
|
||||||
uncachedFilterKeywords := make([]*gtsmodel.FilterKeyword, 0, len(uncachedFilterKeywordIDs))
|
uncachedFilterKeywords := make([]*gtsmodel.FilterKeyword, 0, len(uncachedFilterKeywordIDs))
|
||||||
|
|
||||||
|
// Scan from DB.
|
||||||
if err := f.db.
|
if err := f.db.
|
||||||
NewSelect().
|
NewSelect().
|
||||||
Model(&uncachedFilterKeywords).
|
Model(&uncachedFilterKeywords).
|
||||||
|
@ -108,6 +123,16 @@ func (f *filterDB) getFilterKeywords(ctx context.Context, idColumn string, id st
|
||||||
Scan(ctx); err != nil {
|
Scan(ctx); err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Compile all the keyword regular expressions.
|
||||||
|
uncachedFilterKeywords = slices.DeleteFunc(uncachedFilterKeywords, func(filterKeyword *gtsmodel.FilterKeyword) bool {
|
||||||
|
if err := filterKeyword.Compile(); err != nil {
|
||||||
|
log.Errorf(ctx, "error compiling filter keyword regex: %v", err)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
})
|
||||||
|
|
||||||
return uncachedFilterKeywords, nil
|
return uncachedFilterKeywords, nil
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
@ -125,23 +150,26 @@ func (f *filterDB) getFilterKeywords(ctx context.Context, idColumn string, id st
|
||||||
}
|
}
|
||||||
|
|
||||||
// Populate the filter keywords. Remove any that we can't populate from the return slice.
|
// Populate the filter keywords. Remove any that we can't populate from the return slice.
|
||||||
errs := gtserror.NewMultiError(len(filterKeywords))
|
|
||||||
filterKeywords = slices.DeleteFunc(filterKeywords, func(filterKeyword *gtsmodel.FilterKeyword) bool {
|
filterKeywords = slices.DeleteFunc(filterKeywords, func(filterKeyword *gtsmodel.FilterKeyword) bool {
|
||||||
if err := f.populateFilterKeyword(ctx, filterKeyword); err != nil {
|
if err := f.populateFilterKeyword(ctx, filterKeyword); err != nil {
|
||||||
errs.Appendf(
|
log.Errorf(ctx, "error populating filter keyword: %v", err)
|
||||||
"error populating filter keyword %s: %w",
|
|
||||||
filterKeyword.ID,
|
|
||||||
err,
|
|
||||||
)
|
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
return false
|
return false
|
||||||
})
|
})
|
||||||
|
|
||||||
return filterKeywords, errs.Combine()
|
return filterKeywords, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (f *filterDB) PutFilterKeyword(ctx context.Context, filterKeyword *gtsmodel.FilterKeyword) error {
|
func (f *filterDB) PutFilterKeyword(ctx context.Context, filterKeyword *gtsmodel.FilterKeyword) error {
|
||||||
|
if filterKeyword.Regexp == nil {
|
||||||
|
// Ensure regexp is compiled
|
||||||
|
// before attempted caching.
|
||||||
|
err := filterKeyword.Compile()
|
||||||
|
if err != nil {
|
||||||
|
return gtserror.Newf("error compiling filter keyword regex: %w", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
return f.state.Caches.GTS.FilterKeyword.Store(filterKeyword, func() error {
|
return f.state.Caches.GTS.FilterKeyword.Store(filterKeyword, func() error {
|
||||||
_, err := f.db.
|
_, err := f.db.
|
||||||
NewInsert().
|
NewInsert().
|
||||||
|
@ -156,7 +184,14 @@ func (f *filterDB) UpdateFilterKeyword(ctx context.Context, filterKeyword *gtsmo
|
||||||
if len(columns) > 0 {
|
if len(columns) > 0 {
|
||||||
columns = append(columns, "updated_at")
|
columns = append(columns, "updated_at")
|
||||||
}
|
}
|
||||||
|
if filterKeyword.Regexp == nil {
|
||||||
|
// Ensure regexp is compiled
|
||||||
|
// before attempted caching.
|
||||||
|
err := filterKeyword.Compile()
|
||||||
|
if err != nil {
|
||||||
|
return gtserror.Newf("error compiling filter keyword regex: %w", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
return f.state.Caches.GTS.FilterKeyword.Store(filterKeyword, func() error {
|
return f.state.Caches.GTS.FilterKeyword.Store(filterKeyword, func() error {
|
||||||
_, err := f.db.
|
_, err := f.db.
|
||||||
NewUpdate().
|
NewUpdate().
|
||||||
|
|
|
@ -17,7 +17,10 @@
|
||||||
|
|
||||||
package gtsmodel
|
package gtsmodel
|
||||||
|
|
||||||
import "time"
|
import (
|
||||||
|
"regexp"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
// Filter stores a filter created by a local account.
|
// Filter stores a filter created by a local account.
|
||||||
type Filter struct {
|
type Filter struct {
|
||||||
|
@ -39,14 +42,28 @@ type Filter struct {
|
||||||
|
|
||||||
// FilterKeyword stores a single keyword to filter statuses against.
|
// FilterKeyword stores a single keyword to filter statuses against.
|
||||||
type FilterKeyword struct {
|
type FilterKeyword struct {
|
||||||
ID string `bun:"type:CHAR(26),pk,nullzero,notnull,unique"` // id of this item in the database
|
ID string `bun:"type:CHAR(26),pk,nullzero,notnull,unique"` // id of this item in the database
|
||||||
CreatedAt time.Time `bun:"type:timestamptz,nullzero,notnull,default:current_timestamp"` // when was item created
|
CreatedAt time.Time `bun:"type:timestamptz,nullzero,notnull,default:current_timestamp"` // when was item created
|
||||||
UpdatedAt time.Time `bun:"type:timestamptz,nullzero,notnull,default:current_timestamp"` // when was item last updated
|
UpdatedAt time.Time `bun:"type:timestamptz,nullzero,notnull,default:current_timestamp"` // when was item last updated
|
||||||
AccountID string `bun:"type:CHAR(26),notnull,nullzero"` // ID of the local account that created the filter keyword.
|
AccountID string `bun:"type:CHAR(26),notnull,nullzero"` // ID of the local account that created the filter keyword.
|
||||||
FilterID string `bun:"type:CHAR(26),notnull,nullzero,unique:filter_keywords_filter_id_keyword_uniq"` // ID of the filter that this keyword belongs to.
|
FilterID string `bun:"type:CHAR(26),notnull,nullzero,unique:filter_keywords_filter_id_keyword_uniq"` // ID of the filter that this keyword belongs to.
|
||||||
Filter *Filter `bun:"-"` // Filter corresponding to FilterID
|
Filter *Filter `bun:"-"` // Filter corresponding to FilterID
|
||||||
Keyword string `bun:",nullzero,notnull,unique:filter_keywords_filter_id_keyword_uniq"` // The keyword or phrase to filter against.
|
Keyword string `bun:",nullzero,notnull,unique:filter_keywords_filter_id_keyword_uniq"` // The keyword or phrase to filter against.
|
||||||
WholeWord *bool `bun:",nullzero,notnull,default:false"` // Should the filter consider word boundaries?
|
WholeWord *bool `bun:",nullzero,notnull,default:false"` // Should the filter consider word boundaries?
|
||||||
|
Regexp *regexp.Regexp `bun:"-"` // pre-prepared regular expression
|
||||||
|
}
|
||||||
|
|
||||||
|
// Compile will compile this FilterKeyword as a prepared regular expression.
|
||||||
|
func (k *FilterKeyword) Compile() (err error) {
|
||||||
|
var wordBreak string
|
||||||
|
if k.WholeWord != nil && *k.WholeWord {
|
||||||
|
wordBreak = `\b`
|
||||||
|
}
|
||||||
|
|
||||||
|
// Compile keyword filter regexp.
|
||||||
|
quoted := regexp.QuoteMeta(k.Keyword)
|
||||||
|
k.Regexp, err = regexp.Compile(`(?i)` + wordBreak + quoted + wordBreak)
|
||||||
|
return // caller is expected to wrap this error
|
||||||
}
|
}
|
||||||
|
|
||||||
// FilterStatus stores a single status to filter.
|
// FilterStatus stores a single status to filter.
|
||||||
|
|
|
@ -22,7 +22,6 @@ import (
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"math"
|
"math"
|
||||||
"regexp"
|
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
@ -746,18 +745,9 @@ func (c *Converter) statusToAPIFilterResults(
|
||||||
keywordMatches := make([]string, 0, len(filter.Keywords))
|
keywordMatches := make([]string, 0, len(filter.Keywords))
|
||||||
fields := filterableTextFields(s)
|
fields := filterableTextFields(s)
|
||||||
for _, filterKeyword := range filter.Keywords {
|
for _, filterKeyword := range filter.Keywords {
|
||||||
wholeWord := util.PtrValueOr(filterKeyword.WholeWord, false)
|
|
||||||
wordBreak := ``
|
|
||||||
if wholeWord {
|
|
||||||
wordBreak = `\b`
|
|
||||||
}
|
|
||||||
re, err := regexp.Compile(`(?i)` + wordBreak + regexp.QuoteMeta(filterKeyword.Keyword) + wordBreak)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
var isMatch bool
|
var isMatch bool
|
||||||
for _, field := range fields {
|
for _, field := range fields {
|
||||||
if re.MatchString(field) {
|
if filterKeyword.Regexp.MatchString(field) {
|
||||||
isMatch = true
|
isMatch = true
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
|
|
|
@ -546,6 +546,7 @@ func (suite *InternalToFrontendTestSuite) TestWarnFilteredStatusToFrontend() {
|
||||||
requestingAccount := suite.testAccounts["local_account_1"]
|
requestingAccount := suite.testAccounts["local_account_1"]
|
||||||
expectedMatchingFilter := suite.testFilters["local_account_1_filter_1"]
|
expectedMatchingFilter := suite.testFilters["local_account_1_filter_1"]
|
||||||
expectedMatchingFilterKeyword := suite.testFilterKeywords["local_account_1_filter_1_keyword_1"]
|
expectedMatchingFilterKeyword := suite.testFilterKeywords["local_account_1_filter_1_keyword_1"]
|
||||||
|
suite.NoError(expectedMatchingFilterKeyword.Compile())
|
||||||
expectedMatchingFilterKeyword.Filter = expectedMatchingFilter
|
expectedMatchingFilterKeyword.Filter = expectedMatchingFilter
|
||||||
expectedMatchingFilter.Keywords = []*gtsmodel.FilterKeyword{expectedMatchingFilterKeyword}
|
expectedMatchingFilter.Keywords = []*gtsmodel.FilterKeyword{expectedMatchingFilterKeyword}
|
||||||
requestingAccountFilters := []*gtsmodel.Filter{expectedMatchingFilter}
|
requestingAccountFilters := []*gtsmodel.Filter{expectedMatchingFilter}
|
||||||
|
@ -700,6 +701,7 @@ func (suite *InternalToFrontendTestSuite) TestHideFilteredStatusToFrontend() {
|
||||||
expectedMatchingFilter := suite.testFilters["local_account_1_filter_1"]
|
expectedMatchingFilter := suite.testFilters["local_account_1_filter_1"]
|
||||||
expectedMatchingFilter.Action = gtsmodel.FilterActionHide
|
expectedMatchingFilter.Action = gtsmodel.FilterActionHide
|
||||||
expectedMatchingFilterKeyword := suite.testFilterKeywords["local_account_1_filter_1_keyword_1"]
|
expectedMatchingFilterKeyword := suite.testFilterKeywords["local_account_1_filter_1_keyword_1"]
|
||||||
|
suite.NoError(expectedMatchingFilterKeyword.Compile())
|
||||||
expectedMatchingFilterKeyword.Filter = expectedMatchingFilter
|
expectedMatchingFilterKeyword.Filter = expectedMatchingFilter
|
||||||
expectedMatchingFilter.Keywords = []*gtsmodel.FilterKeyword{expectedMatchingFilterKeyword}
|
expectedMatchingFilter.Keywords = []*gtsmodel.FilterKeyword{expectedMatchingFilterKeyword}
|
||||||
requestingAccountFilters := []*gtsmodel.Filter{expectedMatchingFilter}
|
requestingAccountFilters := []*gtsmodel.Filter{expectedMatchingFilter}
|
||||||
|
|
Loading…
Reference in a new issue