From 536bd2f6af6e2dc890a064d0e50de4e6deea109c Mon Sep 17 00:00:00 2001 From: Alex Goodman Date: Fri, 21 Jun 2024 17:34:52 -0400 Subject: [PATCH] denormalize affected fields Signed-off-by: Alex Goodman --- grype/db/v6/example/fixture-1.go | 16 +- grype/db/v6/example/main.go | 2 +- grype/db/v6/models.go | 103 +++++++---- grype/db/v6/vulnerability.go | 298 +++++++++++++++---------------- 4 files changed, 227 insertions(+), 192 deletions(-) diff --git a/grype/db/v6/example/fixture-1.go b/grype/db/v6/example/fixture-1.go index d7c65910..32bf2a02 100644 --- a/grype/db/v6/example/fixture-1.go +++ b/grype/db/v6/example/fixture-1.go @@ -56,8 +56,8 @@ func populateFixture1(store v6.Store) error { Affected: &[]v6.Affected{ { Package: &v6.Package{ - Ecosystem: "python", - PackageName: "example-python-package1", + Ecosystem: "python", + Name: "example-python-package1", Purls: &[]v6.Purl{ { Scheme: "pkg", @@ -92,8 +92,8 @@ func populateFixture1(store v6.Store) error { Affected: &[]v6.Affected{ { Package: &v6.Package{ - Ecosystem: "golang", - PackageName: "example-golang-package1", + Ecosystem: "golang", + Name: "example-golang-package1", Purls: &[]v6.Purl{ { Scheme: "pkg", @@ -133,8 +133,8 @@ func populateFixture1(store v6.Store) error { Affected: &[]v6.Affected{ { Package: &v6.Package{ - Ecosystem: "python", - PackageName: "example-python-package2", + Ecosystem: "python", + Name: "example-python-package2", Purls: &[]v6.Purl{ { Scheme: "pkg", @@ -177,8 +177,8 @@ func populateFixture1(store v6.Store) error { Affected: &[]v6.Affected{ { Package: &v6.Package{ - Ecosystem: "python", - PackageName: fmt.Sprintf("example-python-package%d", i+3), + Ecosystem: "python", + Name: fmt.Sprintf("example-python-package%d", i+3), Purls: &[]v6.Purl{ { Scheme: "pkg", diff --git a/grype/db/v6/example/main.go b/grype/db/v6/example/main.go index 8500ffde..2516bc1e 100644 --- a/grype/db/v6/example/main.go +++ b/grype/db/v6/example/main.go @@ -188,7 +188,7 @@ func (v affectedPackageStringer) String() string { if v.Package == nil { return "Package=" } - return fmt.Sprintf("Package[%d]{Ecosystem=%q, PackageName=%q, %v}", v.ID, v.Ecosystem, v.PackageName, operatingSystemStringer{v.OperatingSystem}) + return fmt.Sprintf("Package[%d]{Ecosystem=%q, Name=%q, %v}", v.ID, v.Ecosystem, v.Name, operatingSystemStringer{v.OperatingSystem}) } type operatingSystemStringer struct { diff --git a/grype/db/v6/models.go b/grype/db/v6/models.go index 0982ee01..2e1806c4 100644 --- a/grype/db/v6/models.go +++ b/grype/db/v6/models.go @@ -1,6 +1,7 @@ package v6 import ( + "fmt" "gorm.io/datatypes" "gorm.io/gorm" "time" @@ -85,6 +86,46 @@ type Vulnerability struct { // Affected is a list of affected entries related to this vulnerability Affected *[]Affected `gorm:"foreignKey:VulnerabilityID"` + + affected *[]Affected `gorm:"-"` +} + +func (c *Vulnerability) BeforeCreate(tx *gorm.DB) error { + // if the len of Affected is > 500, then create those in batches and then attach those to the Vulnerability + if c.Affected != nil && len(*c.Affected) > 500 { + c.affected = c.Affected + c.Affected = nil + } + + return nil +} + +func (c *Vulnerability) AfterCreate(tx *gorm.DB) error { + if c.affected == nil { + return nil + } + + // create in batches... + + var affecteds []*Affected + affs := *c.affected + for i := range affs { + a := affs[i] + a.VulnerabilityID = c.ID + affecteds = append(affecteds, &a) + } + + if err := tx.CreateInBatches(affecteds, 500).Error; err != nil { + return fmt.Errorf("failed to create affecteds in batches: %w", err) + } + affs = make([]Affected, len(affecteds)) + for i := range affecteds { + affs[i] = *affecteds[i] + } + c.Affected = &affs + c.affected = nil + + return nil } // TODO: can I do this? @@ -192,10 +233,10 @@ type Affected struct { Severities *datatypes.JSONSlice[AffectedSeverity] `gorm:"column:severities"` PackageQualifier *datatypes.JSON `gorm:"column:package_qualifier"` - Range *[]Range `gorm:"foreignKey:AffectedID"` - Packages *[]Package `gorm:"many2many:affected_packages"` - Digests *[]Digest `gorm:"many2many:affected_digests"` - Cpes *[]Cpe `gorm:"many2many:affected_cpes"` + Range *[]Range `gorm:"foreignKey:AffectedID"` + Package *Package `gorm:"embedded;embeddedPrefix:package_"` + Digest *Digest `gorm:"embedded;embeddedPrefix:digest_"` + Cpes *datatypes.JSONSlice[Cpe] `gorm:"column:cpes"` } // TODO: add later and reuse existing similar tables with many2many @@ -291,8 +332,6 @@ func (re *RangeEvent) BeforeCreate(tx *gorm.DB) (err error) { type Cpe struct { // TODO: what about different CPE versions? - ID int64 `gorm:"column:id;primaryKey"` - Schema string `gorm:"column:schema;not null;index:idx_cpe"` // effectively the CPE version Type string `gorm:"column:type;not null;index:idx_cpe"` Vendor *string `gorm:"column:vendor;index:idx_cpe"` @@ -304,33 +343,29 @@ type Cpe struct { // TODO: should we also have the remaining CPE fields here? } -func (c *Cpe) BeforeCreate(tx *gorm.DB) (err error) { - // if the name, major version, and minor version already exist in the table then we should not insert a new record - var existing Cpe - result := tx.Where("schema = ? AND type = ? AND vendor = ? AND product = ? AND version = ? AND version_update = ? AND target_software = ?", c.Schema, c.Type, c.Vendor, c.Product, c.Version, c.Update, c.TargetSoftware).First(&existing) - if result.Error == nil { - // if the record already exists, then we should use the existing record - *c = existing - } - return nil -} +//func (c *Cpe) BeforeCreate(tx *gorm.DB) (err error) { +// // if the name, major version, and minor version already exist in the table then we should not insert a new record +// var existing Cpe +// result := tx.Where("schema = ? AND type = ? AND vendor = ? AND product = ? AND version = ? AND version_update = ? AND target_software = ?", c.Schema, c.Type, c.Vendor, c.Product, c.Version, c.Update, c.TargetSoftware).First(&existing) +// if result.Error == nil { +// // if the record already exists, then we should use the existing record +// *c = existing +// } +// return nil +//} // Digest represents arbitrary digests that can be associated with a vulnerability such that if found the material can be considered to be affected by this vulnerability type Digest struct { - ID int64 `gorm:"column:id;primaryKey"` - - Algorithm string `gorm:"column:algorithm;not null"` - Value string `gorm:"column:value;not null"` + Algorithm string `gorm:"column:algorithm"` + Value string `gorm:"column:value"` } type Package struct { // TODO: setup unique indexes only for writing and drop before shipping for the best size tradeoff - ID int64 `gorm:"column:id;primaryKey"` - // TODO: break purl out into fields here - Ecosystem *string `gorm:"column:ecosystem;index:idx_package,unique"` // TODO: NVD doesn't have this, should this be nullable? - PackageName string `gorm:"column:package_name;index:idx_package,unique"` + Ecosystem *string `gorm:"column:ecosystem;index:idx_package"` // TODO: NVD doesn't have this, should this be nullable? + Name string `gorm:"column:name;index:idx_package"` //OperatingSystemID *int64 `gorm:"column:operating_system_id"` //OperatingSystem *OperatingSystem `gorm:"foreignKey:OperatingSystemID"` @@ -340,16 +375,16 @@ type Package struct { } -func (c *Package) BeforeCreate(tx *gorm.DB) (err error) { - // if the name, major version, and minor version already exist in the table then we should not insert a new record - var existing Package - result := tx.Where("package_name = ? AND ecosystem = ?", c.PackageName, c.Ecosystem).First(&existing) - if result.Error == nil { - // if the record already exists, then we should use the existing record - *c = existing - } - return nil -} +//func (c *Package) BeforeCreate(tx *gorm.DB) (err error) { +// // if the name, major version, and minor version already exist in the table then we should not insert a new record +// var existing Package +// result := tx.Where("name = ? AND ecosystem = ?", c.Name, c.Ecosystem).First(&existing) +// if result.Error == nil { +// // if the record already exists, then we should use the existing record +// *c = existing +// } +// return nil +//} //type Purl struct { // ID int64 `gorm:"column:id;primaryKey"` diff --git a/grype/db/v6/vulnerability.go b/grype/db/v6/vulnerability.go index 2c6e3710..f8fa1bca 100644 --- a/grype/db/v6/vulnerability.go +++ b/grype/db/v6/vulnerability.go @@ -44,8 +44,8 @@ func (s *vulnerabilityStore) AddVulnerabilities(vulnerabilities ...*Vulnerabilit for _, h := range []func([]*Vulnerability) error{ s.handleOSs, s.handleRangeEvents, - s.handleCPEs, - s.handlePackages, + //s.handleCPEs, + //s.handlePackages, } { if err := h(vulnerabilities); err != nil { return err @@ -56,154 +56,154 @@ func (s *vulnerabilityStore) AddVulnerabilities(vulnerabilities ...*Vulnerabilit return s.db.CreateInBatches(vulnerabilities, s.BatchSize).Error } -func (s *vulnerabilityStore) handlePackages(vulns []*Vulnerability) error { - // ensure unique packages - unique, err := ensureUniquePackages(s.db, vulns) - if err != nil { - return err - } +//func (s *vulnerabilityStore) handlePackages(vulns []*Vulnerability) error { +// // ensure unique packages +// unique, err := ensureUniquePackages(s.db, vulns) +// if err != nil { +// return err +// } +// +// // update vulnerabilities with package IDs +// updateAffectedsWithPackages(vulns, unique) +// +// return nil +//} +// +//func ensureUniquePackages(db *gorm.DB, vulns []*Vulnerability) ([]*Package, error) { +// // map to track unique packages +// pkgMap := make(map[string]*Package) +// for _, v := range vulns { +// for _, a := range *v.Affected { +// if a.Packages == nil { +// continue +// } +// pkgs := *a.Packages +// for i, p := range pkgs { +// pkgKey := fmt.Sprintf("%s:%s", strVal(p.Ecosystem), p.PackageName) +// val := &pkgs[i] +// pkgMap[pkgKey] = val +// } +// } +// } +// +// // extract unique packages +// var uniquePackages []*Package +// for _, pkg := range pkgMap { +// uniquePackages = append(uniquePackages, pkg) +// } +// +// // insert unique packages into the database or fetch existing ones +// for i, pkg := range uniquePackages { +// var existing Package +// err := db.Where("package_name = ? AND ecosystem = ?", pkg.Name, pkg.Ecosystem). +// FirstOrCreate(&existing, pkg).Error +// if err != nil { +// return nil, err +// } +// uniquePackages[i].ID = existing.ID +// } +// +// return uniquePackages, nil +// +//} +// +//func updateAffectedsWithPackages(vulns []*Vulnerability, uniquePackages []*Package) { +// pkgMap := make(map[string]int64) +// for _, p := range uniquePackages { +// pkgKey := fmt.Sprintf("%s:%s", strVal(p.Ecosystem), p.Name) +// pkgMap[pkgKey] = p.ID +// } +// +// for i, v := range vulns { +// for j, a := range *v.Affected { +// if a.Packages == nil { +// continue +// } +// pkgs := *a.Packages +// for k, p := range pkgs { +// pkgKey := fmt.Sprintf("%s:%s", strVal(p.Ecosystem), p.PackageName) +// val := pkgMap[pkgKey] +// (*(*vulns[i].Affected)[j].Packages)[k].ID = val +// } +// } +// } +// +//} - // update vulnerabilities with package IDs - updateAffectedsWithPackages(vulns, unique) - - return nil -} - -func ensureUniquePackages(db *gorm.DB, vulns []*Vulnerability) ([]*Package, error) { - // map to track unique packages - pkgMap := make(map[string]*Package) - for _, v := range vulns { - for _, a := range *v.Affected { - if a.Packages == nil { - continue - } - pkgs := *a.Packages - for i, p := range pkgs { - pkgKey := fmt.Sprintf("%s:%s", strVal(p.Ecosystem), p.PackageName) - val := &pkgs[i] - pkgMap[pkgKey] = val - } - } - } - - // extract unique packages - var uniquePackages []*Package - for _, pkg := range pkgMap { - uniquePackages = append(uniquePackages, pkg) - } - - // insert unique packages into the database or fetch existing ones - for i, pkg := range uniquePackages { - var existing Package - err := db.Where("package_name = ? AND ecosystem = ?", pkg.PackageName, pkg.Ecosystem). - FirstOrCreate(&existing, pkg).Error - if err != nil { - return nil, err - } - uniquePackages[i].ID = existing.ID - } - - return uniquePackages, nil - -} - -func updateAffectedsWithPackages(vulns []*Vulnerability, uniquePackages []*Package) { - pkgMap := make(map[string]int64) - for _, p := range uniquePackages { - pkgKey := fmt.Sprintf("%s:%s", strVal(p.Ecosystem), p.PackageName) - pkgMap[pkgKey] = p.ID - } - - for i, v := range vulns { - for j, a := range *v.Affected { - if a.Packages == nil { - continue - } - pkgs := *a.Packages - for k, p := range pkgs { - pkgKey := fmt.Sprintf("%s:%s", strVal(p.Ecosystem), p.PackageName) - val := pkgMap[pkgKey] - (*(*vulns[i].Affected)[j].Packages)[k].ID = val - } - } - } - -} - -func (s *vulnerabilityStore) handleCPEs(vulns []*Vulnerability) error { - // ensure unique cpes - unique, err := ensureUniqueCPEs(s.db, vulns) - if err != nil { - return err - } - - // update vulnerabilities with cpes IDs - updateAffectedsWithCPEs(vulns, unique) - - return nil -} - -func updateAffectedsWithCPEs(vulns []*Vulnerability, uniqueCPEs []*Cpe) { - cpeMap := make(map[string]int64) - for _, c := range uniqueCPEs { - cpeKey := fmt.Sprintf("%s:%s:%s:%s:%s:%s:%s", c.Schema, c.Type, strVal(c.Vendor), c.Product, strVal(c.Version), strVal(c.Update), strVal(c.TargetSoftware)) - cpeMap[cpeKey] = c.ID - } - - for i, v := range vulns { - if v.Affected == nil { - continue - } - for j, a := range *v.Affected { - if a.Cpes == nil { - continue - } - for k, c := range *a.Cpes { - cpeKey := fmt.Sprintf("%s:%s:%s:%s:%s:%s:%s", c.Schema, c.Type, strVal(c.Vendor), c.Product, strVal(c.Version), strVal(c.Update), strVal(c.TargetSoftware)) - val := cpeMap[cpeKey] - (*(*vulns[i].Affected)[j].Cpes)[k].ID = val - } - } - } -} - -func ensureUniqueCPEs(db *gorm.DB, vulns []*Vulnerability) ([]*Cpe, error) { - cpeMap := make(map[string]Cpe) - for _, v := range vulns { - if v.Affected == nil { - continue - } - for _, a := range *v.Affected { - if a.Cpes == nil { - continue - } - for _, c := range *a.Cpes { - cpeKey := fmt.Sprintf("%s:%s:%s:%s:%s:%s:%s", c.Schema, c.Type, strVal(c.Vendor), c.Product, strVal(c.Version), strVal(c.Update), strVal(c.TargetSoftware)) - cpeMap[cpeKey] = c - } - } - } - - // extract unique CPEs - var uniqueCPEs []*Cpe - for i := range cpeMap { - c := cpeMap[i] - uniqueCPEs = append(uniqueCPEs, &c) - } - - // insert unique CPEs into the database or fetch existing ones - for i, c := range uniqueCPEs { - var existing Cpe - err := db.Where("schema = ? AND type = ? AND vendor = ? AND product = ? AND version = ? AND version_update = ? AND target_software = ?", c.Schema, c.Type, c.Vendor, c.Product, c.Version, c.Update, c.TargetSoftware). - FirstOrCreate(&existing, c).Error - if err != nil { - return nil, err - } - uniqueCPEs[i] = &existing - } - - return uniqueCPEs, nil -} +//func (s *vulnerabilityStore) handleCPEs(vulns []*Vulnerability) error { +// // ensure unique cpes +// unique, err := ensureUniqueCPEs(s.db, vulns) +// if err != nil { +// return err +// } +// +// // update vulnerabilities with cpes IDs +// updateAffectedsWithCPEs(vulns, unique) +// +// return nil +//} +// +//func updateAffectedsWithCPEs(vulns []*Vulnerability, uniqueCPEs []*Cpe) { +// cpeMap := make(map[string]int64) +// for _, c := range uniqueCPEs { +// cpeKey := fmt.Sprintf("%s:%s:%s:%s:%s:%s:%s", c.Schema, c.Type, strVal(c.Vendor), c.Product, strVal(c.Version), strVal(c.Update), strVal(c.TargetSoftware)) +// cpeMap[cpeKey] = c.ID +// } +// +// for i, v := range vulns { +// if v.Affected == nil { +// continue +// } +// for j, a := range *v.Affected { +// if a.Cpes == nil { +// continue +// } +// for k, c := range *a.Cpes { +// cpeKey := fmt.Sprintf("%s:%s:%s:%s:%s:%s:%s", c.Schema, c.Type, strVal(c.Vendor), c.Product, strVal(c.Version), strVal(c.Update), strVal(c.TargetSoftware)) +// val := cpeMap[cpeKey] +// (*(*vulns[i].Affected)[j].Cpes)[k].ID = val +// } +// } +// } +//} +// +//func ensureUniqueCPEs(db *gorm.DB, vulns []*Vulnerability) ([]*Cpe, error) { +// cpeMap := make(map[string]Cpe) +// for _, v := range vulns { +// if v.Affected == nil { +// continue +// } +// for _, a := range *v.Affected { +// if a.Cpes == nil { +// continue +// } +// for _, c := range *a.Cpes { +// cpeKey := fmt.Sprintf("%s:%s:%s:%s:%s:%s:%s", c.Schema, c.Type, strVal(c.Vendor), c.Product, strVal(c.Version), strVal(c.Update), strVal(c.TargetSoftware)) +// cpeMap[cpeKey] = c +// } +// } +// } +// +// // extract unique CPEs +// var uniqueCPEs []*Cpe +// for i := range cpeMap { +// c := cpeMap[i] +// uniqueCPEs = append(uniqueCPEs, &c) +// } +// +// // insert unique CPEs into the database or fetch existing ones +// for i, c := range uniqueCPEs { +// var existing Cpe +// err := db.Where("schema = ? AND type = ? AND vendor = ? AND product = ? AND version = ? AND version_update = ? AND target_software = ?", c.Schema, c.Type, c.Vendor, c.Product, c.Version, c.Update, c.TargetSoftware). +// FirstOrCreate(&existing, c).Error +// if err != nil { +// return nil, err +// } +// uniqueCPEs[i] = &existing +// } +// +// return uniqueCPEs, nil +//} func (s *vulnerabilityStore) handleRangeEvents(vulns []*Vulnerability) error { // ensure unique operating systems