[performance] Database optimizations (#419)

* create first index on notifications

* tidy up + add tests

* log queries for trace, ops for debug

* index commonly used fields

* rearrange query

* add a few more indexes

* remove schema-breaking index
(add this back in later)

* re-add cleanup query index
This commit is contained in:
tobi 2022-03-07 11:33:18 +01:00 committed by GitHub
parent 07727753b9
commit 8de928b5e9
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 427 additions and 8 deletions

View file

@ -137,8 +137,8 @@ func NewBunDBService(ctx context.Context) (db.DB, error) {
} }
// add a hook to just log queries and the time they take // add a hook to just log queries and the time they take
// only do this for trace logging where performance isn't 1st concern // only do this for logging where performance isn't 1st concern
if logrus.GetLevel() >= logrus.TraceLevel { if logrus.GetLevel() >= logrus.DebugLevel {
conn.DB.AddQueryHook(newDebugQueryHook()) conn.DB.AddQueryHook(newDebugQueryHook())
} }

View file

@ -0,0 +1,291 @@
/*
GoToSocial
Copyright (C) 2021-2022 GoToSocial Authors admin@gotosocial.org
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package migrations
import (
"context"
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
"github.com/uptrace/bun"
)
func init() {
up := func(ctx context.Context, db *bun.DB) error {
return db.RunInTx(ctx, nil, func(ctx context.Context, tx bun.Tx) error {
// ACCOUNTS are often selected by URI, username, or domain
if _, err := tx.
NewCreateIndex().
Model(&gtsmodel.Account{}).
Index("accounts_uri_idx").
Column("uri").
Exec(ctx); err != nil {
return err
}
if _, err := tx.
NewCreateIndex().
Model(&gtsmodel.Account{}).
Index("accounts_username_idx").
Column("username").
Exec(ctx); err != nil {
return err
}
if _, err := tx.
NewCreateIndex().
Model(&gtsmodel.Account{}).
Index("accounts_domain_idx").
Column("domain").
Exec(ctx); err != nil {
return err
}
if _, err := tx.
NewCreateIndex().
Model(&gtsmodel.Account{}).
Index("accounts_username_domain_idx"). // for selecting local accounts by username
Column("username", "domain").
Exec(ctx); err != nil {
return err
}
// NOTIFICATIONS are commonly selected by their target_account_id
if _, err := tx.
NewCreateIndex().
Model(&gtsmodel.Notification{}).
Index("notifications_target_account_id_idx").
Column("target_account_id").
Exec(ctx); err != nil {
return err
}
// STATUSES are selected in many different ways, so we need quite few indexes
// to avoid queries becoming painfully slow as more statuses get stored
if _, err := tx.
NewCreateIndex().
Model(&gtsmodel.Status{}).
Index("statuses_uri_idx").
Column("uri").
Exec(ctx); err != nil {
return err
}
if _, err := tx.
NewCreateIndex().
Model(&gtsmodel.Status{}).
Index("statuses_account_id_idx").
Column("account_id").
Exec(ctx); err != nil {
return err
}
if _, err := tx.
NewCreateIndex().
Model(&gtsmodel.Status{}).
Index("statuses_in_reply_to_account_id_idx").
Column("in_reply_to_account_id").
Exec(ctx); err != nil {
return err
}
if _, err := tx.
NewCreateIndex().
Model(&gtsmodel.Status{}).
Index("statuses_boost_of_account_id_idx").
Column("boost_of_account_id").
Exec(ctx); err != nil {
return err
}
if _, err := tx.
NewCreateIndex().
Model(&gtsmodel.Status{}).
Index("statuses_in_reply_to_id_idx").
Column("in_reply_to_id").
Exec(ctx); err != nil {
return err
}
if _, err := tx.
NewCreateIndex().
Model(&gtsmodel.Status{}).
Index("statuses_boost_of_id_idx").
Column("boost_of_id").
Exec(ctx); err != nil {
return err
}
if _, err := tx.
NewCreateIndex().
Model(&gtsmodel.Status{}).
Index("statuses_visibility_idx").
Column("visibility").
Exec(ctx); err != nil {
return err
}
if _, err := tx.
NewCreateIndex().
Model(&gtsmodel.Status{}).
Index("statuses_local_idx").
Column("local").
Exec(ctx); err != nil {
return err
}
// DOMAIN BLOCKS are almost always selected by their domain
if _, err := tx.
NewCreateIndex().
Model(&gtsmodel.DomainBlock{}).
Index("domain_blocks_domain_idx").
Column("domain").
Exec(ctx); err != nil {
return err
}
// INSTANCES are usually selected by their domain
if _, err := tx.
NewCreateIndex().
Model(&gtsmodel.Instance{}).
Index("instances_domain_idx").
Column("domain").
Exec(ctx); err != nil {
return err
}
// STATUS FAVES are almost always selected by their target status
if _, err := tx.
NewCreateIndex().
Model(&gtsmodel.StatusFave{}).
Index("status_faves_status_id_idx").
Column("status_id").
Exec(ctx); err != nil {
return err
}
// MENTIONS are almost always selected by their originating status
if _, err := tx.
NewCreateIndex().
Model(&gtsmodel.Mention{}).
Index("mentions_status_id_idx").
Column("status_id").
Exec(ctx); err != nil {
return err
}
// FOLLOW_REQUESTS and FOLLOWS are usually selected by who they originate from, and who they target
if _, err := tx.
NewCreateIndex().
Model(&gtsmodel.FollowRequest{}).
Index("follow_requests_account_id_idx").
Column("account_id").
Exec(ctx); err != nil {
return err
}
if _, err := tx.
NewCreateIndex().
Model(&gtsmodel.FollowRequest{}).
Index("follow_requests_target_account_id_idx").
Column("target_account_id").
Exec(ctx); err != nil {
return err
}
if _, err := tx.
NewCreateIndex().
Model(&gtsmodel.Follow{}).
Index("follows_account_id_idx").
Column("account_id").
Exec(ctx); err != nil {
return err
}
if _, err := tx.
NewCreateIndex().
Model(&gtsmodel.Follow{}).
Index("follows_target_account_id_idx").
Column("target_account_id").
Exec(ctx); err != nil {
return err
}
// BLOCKS are usually selected simultaneously by who they originate from and who they target
if _, err := tx.
NewCreateIndex().
Model(&gtsmodel.Block{}).
Index("blocks_account_id_target_account_id_idx").
Column("account_id", "target_account_id").
Exec(ctx); err != nil {
return err
}
// MEDIA ATTACHMENTS are often selected by status ID, the account that owns the attachment
if _, err := tx.
NewCreateIndex().
Model(&gtsmodel.MediaAttachment{}).
Index("media_attachments_status_id_idx").
Column("status_id").
Exec(ctx); err != nil {
return err
}
if _, err := tx.
NewCreateIndex().
Model(&gtsmodel.MediaAttachment{}).
Index("media_attachments_account_id_idx").
Column("account_id").
Exec(ctx); err != nil {
return err
}
// for media cleanup jobs, attachments will be selected on a bunch of fields so make an index of this...
if _, err := tx.
NewCreateIndex().
Model(&gtsmodel.MediaAttachment{}).
Index("media_attachments_cleanup_idx").
Column("cached", "avatar", "header", "created_at", "remote_url").
Exec(ctx); err != nil {
return err
}
// TOKENS are usually selected via Access field for user-level tokens
if _, err := tx.
NewCreateIndex().
Model(&gtsmodel.Token{}).
Index("tokens_access_idx").
Column("access").
Exec(ctx); err != nil {
return err
}
return nil
})
}
down := func(ctx context.Context, db *bun.DB) error {
return db.RunInTx(ctx, nil, func(ctx context.Context, tx bun.Tx) error {
return nil
})
}
if err := Migrations.Register(up, down); err != nil {
panic(err)
}
}

View file

@ -66,9 +66,7 @@ func (n *notificationDB) GetNotifications(ctx context.Context, accountID string,
q := n.conn. q := n.conn.
NewSelect(). NewSelect().
Model(&notifications). Model(&notifications).
Column("id"). Column("id")
Where("target_account_id = ?", accountID).
Order("id DESC")
if maxID != "" { if maxID != "" {
q = q.Where("id < ?", maxID) q = q.Where("id < ?", maxID)
@ -78,6 +76,10 @@ func (n *notificationDB) GetNotifications(ctx context.Context, accountID string,
q = q.Where("id > ?", sinceID) q = q.Where("id > ?", sinceID)
} }
q = q.
Where("target_account_id = ?", accountID).
Order("id DESC")
if limit != 0 { if limit != 0 {
q = q.Limit(limit) q = q.Limit(limit)
} }
@ -120,8 +122,7 @@ func (n *notificationDB) putNotificationCache(notif *gtsmodel.Notification) {
} }
func (n *notificationDB) getNotificationDB(ctx context.Context, id string, dst *gtsmodel.Notification) error { func (n *notificationDB) getNotificationDB(ctx context.Context, id string, dst *gtsmodel.Notification) error {
q := n.newNotificationQ(dst). q := n.newNotificationQ(dst).WherePK()
Where("notification.id = ?", id)
if err := q.Scan(ctx); err != nil { if err := q.Scan(ctx); err != nil {
return n.conn.ProcessError(err) return n.conn.ProcessError(err)

View file

@ -0,0 +1,123 @@
/*
GoToSocial
Copyright (C) 2021-2022 GoToSocial Authors admin@gotosocial.org
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package bundb_test
import (
"context"
"fmt"
"testing"
"time"
"github.com/stretchr/testify/suite"
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
"github.com/superseriousbusiness/gotosocial/internal/id"
)
func (suite *NotificationTestSuite) spamNotifs() {
// spam a shit ton of notifs into the database
// half of them will be for zork, the other half
// will be for random accounts
notifCount := 10000
zork := suite.testAccounts["local_account_1"]
for i := 0; i < notifCount; i++ {
notifID, err := id.NewULID()
if err != nil {
panic(err)
}
var targetAccountID string
if i%2 == 0 {
targetAccountID = zork.ID
} else {
randomAssID, err := id.NewRandomULID()
if err != nil {
panic(err)
}
targetAccountID = randomAssID
}
statusID, err := id.NewRandomULID()
if err != nil {
panic(err)
}
originAccountID, err := id.NewRandomULID()
if err != nil {
panic(err)
}
notif := &gtsmodel.Notification{
ID: notifID,
NotificationType: gtsmodel.NotificationFave,
CreatedAt: time.Now(),
TargetAccountID: targetAccountID,
OriginAccountID: originAccountID,
StatusID: statusID,
Read: false,
}
if err := suite.db.Put(context.Background(), notif); err != nil {
panic(err)
}
}
fmt.Printf("\n\n\nput %d notifs in the db\n\n\n", notifCount)
}
type NotificationTestSuite struct {
BunDBStandardTestSuite
}
func (suite *NotificationTestSuite) TestGetNotificationsWithSpam() {
suite.spamNotifs()
testAccount := suite.testAccounts["local_account_1"]
before := time.Now()
notifications, err := suite.db.GetNotifications(context.Background(), testAccount.ID, 20, "ZZZZZZZZZZZZZZZZZZZZZZZZZZ", "00000000000000000000000000")
suite.NoError(err)
timeTaken := time.Since(before)
fmt.Printf("\n\n\n withSpam: got %d notifications in %s\n\n\n", len(notifications), timeTaken)
suite.NotNil(notifications)
for _, n := range notifications {
suite.Equal(testAccount.ID, n.TargetAccountID)
}
}
func (suite *NotificationTestSuite) TestGetNotificationsWithoutSpam() {
testAccount := suite.testAccounts["local_account_1"]
before := time.Now()
notifications, err := suite.db.GetNotifications(context.Background(), testAccount.ID, 20, "ZZZZZZZZZZZZZZZZZZZZZZZZZZ", "00000000000000000000000000")
suite.NoError(err)
timeTaken := time.Since(before)
fmt.Printf("\n\n\n withoutSpam: got %d notifications in %s\n\n\n", len(notifications), timeTaken)
suite.NotNil(notifications)
for _, n := range notifications {
suite.Equal(testAccount.ID, n.TargetAccountID)
suite.NotNil(n.OriginAccount)
suite.NotNil(n.TargetAccount)
suite.NotNil(n.Status)
}
}
func TestNotificationTestSuite(t *testing.T) {
suite.Run(t, new(NotificationTestSuite))
}

View file

@ -47,5 +47,9 @@ func (q *debugQueryHook) AfterQuery(_ context.Context, event *bun.QueryEvent) {
"operation": event.Operation(), "operation": event.Operation(),
}) })
l.Tracef("[%s] %s", dur, event.Operation()) if logrus.GetLevel() == logrus.TraceLevel {
l.Tracef("[%s] %s", dur, event.Query)
} else {
l.Debugf("[%s] %s", dur, event.Operation())
}
} }