chore: only check for db update if local db is old enough

Previously, grype would check for a database update on every invocation.
However, this was causing slow downs in the CDN. Since new databases are
typically made available every 24 hours, change grype to, by default,
only check if a new database is available if the local database is more
than 12 hours old.

Signed-off-by: Will Murphy <willmurphyscode@users.noreply.github.com>
This commit is contained in:
Will Murphy 2024-07-24 09:06:27 -04:00
parent e07546ec86
commit 7192206258
7 changed files with 140 additions and 14 deletions

View file

@ -30,6 +30,8 @@ func DBCheck(app clio.Application) *cobra.Command {
}
func runDBCheck(opts options.Database) error {
// `grype db check` should _always_ check for updates, regardless of config
opts.MinAgeToCheckForUpdate = 0
dbCurator, err := db.NewCurator(opts.ToCuratorConfig())
if err != nil {
return err

View file

@ -26,6 +26,8 @@ func DBUpdate(app clio.Application) *cobra.Command {
}
func runDBUpdate(opts options.Database) error {
// `grype db update` should _always_ check for updates, regardless of config
opts.MinAgeToCheckForUpdate = 0
dbCurator, err := db.NewCurator(opts.ToCuratorConfig())
if err != nil {
return err

View file

@ -18,6 +18,7 @@ type Database struct {
AutoUpdate bool `yaml:"auto-update" json:"auto-update" mapstructure:"auto-update"`
ValidateByHashOnStart bool `yaml:"validate-by-hash-on-start" json:"validate-by-hash-on-start" mapstructure:"validate-by-hash-on-start"`
ValidateAge bool `yaml:"validate-age" json:"validate-age" mapstructure:"validate-age"`
MinAgeToCheckForUpdate time.Duration `yaml:"min-age-to-check-for-update" json:"min-age-to-check-for-update" mapstructure:"min-age-to-check-for-update"`
MaxAllowedBuiltAge time.Duration `yaml:"max-allowed-built-age" json:"max-allowed-built-age" mapstructure:"max-allowed-built-age"`
UpdateAvailableTimeout time.Duration `yaml:"update-available-timeout" json:"update-available-timeout" mapstructure:"update-available-timeout"`
UpdateDownloadTimeout time.Duration `yaml:"update-download-timeout" json:"update-download-timeout" mapstructure:"update-download-timeout"`
@ -28,9 +29,10 @@ var _ interface {
} = (*Database)(nil)
const (
defaultMaxDBAge time.Duration = time.Hour * 24 * 5
defaultUpdateAvailableTimeout = time.Second * 30
defaultUpdateDownloadTimeout = time.Second * 120
defaultMinBuiltAgeToCheckForUpdate = time.Hour * 12
defaultMaxDBAge time.Duration = time.Hour * 24 * 5
defaultUpdateAvailableTimeout = time.Second * 30
defaultUpdateDownloadTimeout = time.Second * 120
)
func DefaultDatabase(id clio.Identification) Database {
@ -40,6 +42,7 @@ func DefaultDatabase(id clio.Identification) Database {
AutoUpdate: true,
ValidateAge: true,
// After this period (5 days) the db data is considered stale
MinAgeToCheckForUpdate: defaultMinBuiltAgeToCheckForUpdate,
MaxAllowedBuiltAge: defaultMaxDBAge,
UpdateAvailableTimeout: defaultUpdateAvailableTimeout,
UpdateDownloadTimeout: defaultUpdateDownloadTimeout,
@ -53,6 +56,7 @@ func (cfg Database) ToCuratorConfig() db.Config {
CACert: cfg.CACert,
ValidateByHashOnGet: cfg.ValidateByHashOnStart,
ValidateAge: cfg.ValidateAge,
MinBuiltAgeToCheck: cfg.MinAgeToCheckForUpdate,
MaxAllowedBuiltAge: cfg.MaxAllowedBuiltAge,
ListingFileTimeout: cfg.UpdateAvailableTimeout,
UpdateTimeout: cfg.UpdateDownloadTimeout,

6
go.mod
View file

@ -60,7 +60,10 @@ require (
gorm.io/gorm v1.25.11
)
require github.com/anchore/go-collections v0.0.0-20240216171411-9321230ce537
require (
github.com/anchore/go-collections v0.0.0-20240216171411-9321230ce537
github.com/magiconair/properties v1.8.7
)
require (
cloud.google.com/go v0.110.10 // indirect
@ -164,7 +167,6 @@ require (
github.com/kr/text v0.2.0 // indirect
github.com/logrusorgru/aurora v0.0.0-20200102142835-e9ef32dff381 // indirect
github.com/lucasb-eyer/go-colorful v1.2.0 // indirect
github.com/magiconair/properties v1.8.7 // indirect
github.com/maruel/natural v1.1.1 // indirect
github.com/mattn/go-colorable v0.1.13 // indirect
github.com/mattn/go-isatty v0.0.20 // indirect

View file

@ -36,6 +36,7 @@ type Config struct {
CACert string
ValidateByHashOnGet bool
ValidateAge bool
MinBuiltAgeToCheck time.Duration
MaxAllowedBuiltAge time.Duration
ListingFileTimeout time.Duration
UpdateTimeout time.Duration
@ -52,6 +53,7 @@ type Curator struct {
validateByHashOnGet bool
validateAge bool
maxAllowedBuiltAge time.Duration
minBuildAgeToCheck time.Duration
}
func NewCurator(cfg Config) (Curator, error) {
@ -81,6 +83,7 @@ func NewCurator(cfg Config) (Curator, error) {
validateByHashOnGet: cfg.ValidateByHashOnGet,
validateAge: cfg.ValidateAge,
maxAllowedBuiltAge: cfg.MaxAllowedBuiltAge,
minBuildAgeToCheck: cfg.MinBuiltAgeToCheck,
}, nil
}
@ -189,6 +192,20 @@ func (c *Curator) Update() (bool, error) {
func (c *Curator) IsUpdateAvailable() (bool, *Metadata, *ListingEntry, error) {
log.Debugf("checking for available database updates")
// compare created data to current db date
current, err := NewMetadataFromDir(c.fs, c.dbDir)
if err != nil {
return false, nil, nil, fmt.Errorf("current metadata corrupt: %w", err)
}
// There is very little chance a new DB is available so soon
// Do nothing to reduce traffic on CDN that serves listing file.
if current != nil && time.Since(current.Built) < c.minBuildAgeToCheck {
log.Debugf("skipping db update check because current db is less than %0.2f hours old", c.minBuildAgeToCheck.Hours())
log.Debugf("run 'grype db update' to force a check")
return false, nil, nil, nil
}
listing, err := c.ListingFromURL()
if err != nil {
return false, nil, nil, err
@ -200,12 +217,6 @@ func (c *Curator) IsUpdateAvailable() (bool, *Metadata, *ListingEntry, error) {
}
log.Debugf("found database update candidate: %s", updateEntry)
// compare created data to current db date
current, err := NewMetadataFromDir(c.fs, c.dbDir)
if err != nil {
return false, nil, nil, fmt.Errorf("current metadata corrupt: %w", err)
}
if current.IsSupersededBy(updateEntry) {
log.Debugf("database update available: %s", updateEntry)
return true, current, updateEntry, nil

View file

@ -2,6 +2,7 @@ package db
import (
"bufio"
"encoding/json"
"errors"
"fmt"
"io"
@ -62,10 +63,10 @@ func (g *testGetter) GetToDir(dst, src string, _ ...*progress.Manual) error {
return afero.WriteFile(g.fs, dst, []byte(g.dir[src]), 0755)
}
func newTestCurator(tb testing.TB, fs afero.Fs, getter file.Getter, dbDir, metadataUrl string, validateDbHash bool) Curator {
func newTestCurator(tb testing.TB, fs afero.Fs, getter file.Getter, dbDir, listingFileURL string, validateDbHash bool) Curator {
c, err := NewCurator(Config{
DBRootDir: dbDir,
ListingURL: metadataUrl,
ListingURL: listingFileURL,
ValidateByHashOnGet: validateDbHash,
})
@ -447,3 +448,107 @@ func TestCuratorTimeoutBehavior(t *testing.T) {
t.Fatalf("timeout exceeded (%v)", failAfter)
}
}
func TestCuratorIsUpdateAvailable(t *testing.T) {
testDir := "/tmp/dbDir"
listingURL := "http://example.com/metadata/listing.json"
tests := []struct {
name string
existingMetadata Metadata
listing Listing
forceListingCheck bool
wantListingCheck bool
wantUpdate bool
wantErr bool
}{
{
name: "skip update check; too soon",
existingMetadata: metadataBuiltAt(time.Now().Add(-10 * time.Minute)),
wantListingCheck: false,
wantUpdate: false,
wantErr: false,
},
{
name: "force causes check even if local metadata is very new",
existingMetadata: metadataBuiltAt(time.Now().Add(-10 * time.Minute)),
listing: listingWithEntryBuiltAt(time.Time{}, "http://example.com/payload.tar.gz"),
forceListingCheck: true,
wantListingCheck: true,
wantUpdate: false,
wantErr: false,
},
{
name: "check for update but not needed",
existingMetadata: metadataBuiltAt(time.Now().Add(-50 * time.Hour)), // old listing file
listing: listingWithEntryBuiltAt(time.Time{}, "http://example.com/payload.tar.gz"),
wantListingCheck: true,
wantUpdate: false,
wantErr: false,
},
{
name: "update needed",
wantListingCheck: true,
wantUpdate: true,
wantErr: false,
existingMetadata: metadataBuiltAt(time.Now().Add(-50 * time.Hour)), // old listing file
listing: listingWithEntryBuiltAt(time.Now().Add(-1*time.Hour), "http://example.com/payload.tar.gz"),
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
files := map[string]string{}
b, err := json.Marshal(tt.listing)
require.NoError(t, err)
files[listingURL] = string(b)
dirs := map[string]string{}
fs := afero.NewMemMapFs()
fs.MkdirAll(path.Join(testDir, "5"), 0755)
f, err := fs.Create(path.Join(testDir, "5", "metadata.json"))
require.NoError(t, err)
defer f.Close()
err = json.NewEncoder(f).Encode(tt.existingMetadata)
require.NoError(t, err)
getter := newTestGetter(fs, files, dirs)
cur := newTestCurator(t, fs, getter, testDir, listingURL, false)
cur.minBuildAgeToCheck = 12 * time.Hour
if tt.forceListingCheck {
cur.minBuildAgeToCheck = 0
}
gotUpdate, _, _, gotErr := cur.IsUpdateAvailable()
if tt.wantErr {
require.Error(t, gotErr)
return
}
if !tt.wantListingCheck {
assert.Empty(t, getter.calls)
} else {
assert.True(t, getter.calls.Contains(listingURL))
}
require.NoError(t, gotErr)
assert.Equal(t, tt.wantUpdate, gotUpdate)
})
}
}
func metadataBuiltAt(t time.Time) Metadata {
return Metadata{
Built: t,
Version: 5,
Checksum: "sha256:972d0f51e180d424f3fff2637a1559e06940f35eda6bd03593be2a3db8f28971",
}
}
func listingWithEntryBuiltAt(t time.Time, u string) Listing {
return Listing{Available: map[int][]ListingEntry{
5: {
ListingEntry{
Built: t,
URL: mustUrl(url.Parse(u)),
Version: 5,
Checksum: "sha256:0123456789abcdef",
},
},
}}
}

View file

@ -3,13 +3,13 @@ package match
import (
"testing"
"github.com/anchore/syft/syft/file"
"github.com/google/uuid"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/anchore/grype/grype/pkg"
"github.com/anchore/grype/grype/vulnerability"
"github.com/anchore/syft/syft/file"
syftPkg "github.com/anchore/syft/syft/pkg"
)