denormalize affected fields

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>
This commit is contained in:
Alex Goodman 2024-06-21 17:34:52 -04:00
parent 87292dc353
commit 536bd2f6af
4 changed files with 227 additions and 192 deletions

View file

@ -56,8 +56,8 @@ func populateFixture1(store v6.Store) error {
Affected: &[]v6.Affected{ Affected: &[]v6.Affected{
{ {
Package: &v6.Package{ Package: &v6.Package{
Ecosystem: "python", Ecosystem: "python",
PackageName: "example-python-package1", Name: "example-python-package1",
Purls: &[]v6.Purl{ Purls: &[]v6.Purl{
{ {
Scheme: "pkg", Scheme: "pkg",
@ -92,8 +92,8 @@ func populateFixture1(store v6.Store) error {
Affected: &[]v6.Affected{ Affected: &[]v6.Affected{
{ {
Package: &v6.Package{ Package: &v6.Package{
Ecosystem: "golang", Ecosystem: "golang",
PackageName: "example-golang-package1", Name: "example-golang-package1",
Purls: &[]v6.Purl{ Purls: &[]v6.Purl{
{ {
Scheme: "pkg", Scheme: "pkg",
@ -133,8 +133,8 @@ func populateFixture1(store v6.Store) error {
Affected: &[]v6.Affected{ Affected: &[]v6.Affected{
{ {
Package: &v6.Package{ Package: &v6.Package{
Ecosystem: "python", Ecosystem: "python",
PackageName: "example-python-package2", Name: "example-python-package2",
Purls: &[]v6.Purl{ Purls: &[]v6.Purl{
{ {
Scheme: "pkg", Scheme: "pkg",
@ -177,8 +177,8 @@ func populateFixture1(store v6.Store) error {
Affected: &[]v6.Affected{ Affected: &[]v6.Affected{
{ {
Package: &v6.Package{ Package: &v6.Package{
Ecosystem: "python", Ecosystem: "python",
PackageName: fmt.Sprintf("example-python-package%d", i+3), Name: fmt.Sprintf("example-python-package%d", i+3),
Purls: &[]v6.Purl{ Purls: &[]v6.Purl{
{ {
Scheme: "pkg", Scheme: "pkg",

View file

@ -188,7 +188,7 @@ func (v affectedPackageStringer) String() string {
if v.Package == nil { if v.Package == nil {
return "Package=<nil>" return "Package=<nil>"
} }
return fmt.Sprintf("Package[%d]{Ecosystem=%q, PackageName=%q, %v}", v.ID, v.Ecosystem, v.PackageName, operatingSystemStringer{v.OperatingSystem}) return fmt.Sprintf("Package[%d]{Ecosystem=%q, Name=%q, %v}", v.ID, v.Ecosystem, v.Name, operatingSystemStringer{v.OperatingSystem})
} }
type operatingSystemStringer struct { type operatingSystemStringer struct {

View file

@ -1,6 +1,7 @@
package v6 package v6
import ( import (
"fmt"
"gorm.io/datatypes" "gorm.io/datatypes"
"gorm.io/gorm" "gorm.io/gorm"
"time" "time"
@ -85,6 +86,46 @@ type Vulnerability struct {
// Affected is a list of affected entries related to this vulnerability // Affected is a list of affected entries related to this vulnerability
Affected *[]Affected `gorm:"foreignKey:VulnerabilityID"` Affected *[]Affected `gorm:"foreignKey:VulnerabilityID"`
affected *[]Affected `gorm:"-"`
}
func (c *Vulnerability) BeforeCreate(tx *gorm.DB) error {
// if the len of Affected is > 500, then create those in batches and then attach those to the Vulnerability
if c.Affected != nil && len(*c.Affected) > 500 {
c.affected = c.Affected
c.Affected = nil
}
return nil
}
func (c *Vulnerability) AfterCreate(tx *gorm.DB) error {
if c.affected == nil {
return nil
}
// create in batches...
var affecteds []*Affected
affs := *c.affected
for i := range affs {
a := affs[i]
a.VulnerabilityID = c.ID
affecteds = append(affecteds, &a)
}
if err := tx.CreateInBatches(affecteds, 500).Error; err != nil {
return fmt.Errorf("failed to create affecteds in batches: %w", err)
}
affs = make([]Affected, len(affecteds))
for i := range affecteds {
affs[i] = *affecteds[i]
}
c.Affected = &affs
c.affected = nil
return nil
} }
// TODO: can I do this? // TODO: can I do this?
@ -192,10 +233,10 @@ type Affected struct {
Severities *datatypes.JSONSlice[AffectedSeverity] `gorm:"column:severities"` Severities *datatypes.JSONSlice[AffectedSeverity] `gorm:"column:severities"`
PackageQualifier *datatypes.JSON `gorm:"column:package_qualifier"` PackageQualifier *datatypes.JSON `gorm:"column:package_qualifier"`
Range *[]Range `gorm:"foreignKey:AffectedID"` Range *[]Range `gorm:"foreignKey:AffectedID"`
Packages *[]Package `gorm:"many2many:affected_packages"` Package *Package `gorm:"embedded;embeddedPrefix:package_"`
Digests *[]Digest `gorm:"many2many:affected_digests"` Digest *Digest `gorm:"embedded;embeddedPrefix:digest_"`
Cpes *[]Cpe `gorm:"many2many:affected_cpes"` Cpes *datatypes.JSONSlice[Cpe] `gorm:"column:cpes"`
} }
// TODO: add later and reuse existing similar tables with many2many // TODO: add later and reuse existing similar tables with many2many
@ -291,8 +332,6 @@ func (re *RangeEvent) BeforeCreate(tx *gorm.DB) (err error) {
type Cpe struct { type Cpe struct {
// TODO: what about different CPE versions? // TODO: what about different CPE versions?
ID int64 `gorm:"column:id;primaryKey"`
Schema string `gorm:"column:schema;not null;index:idx_cpe"` // effectively the CPE version Schema string `gorm:"column:schema;not null;index:idx_cpe"` // effectively the CPE version
Type string `gorm:"column:type;not null;index:idx_cpe"` Type string `gorm:"column:type;not null;index:idx_cpe"`
Vendor *string `gorm:"column:vendor;index:idx_cpe"` Vendor *string `gorm:"column:vendor;index:idx_cpe"`
@ -304,33 +343,29 @@ type Cpe struct {
// TODO: should we also have the remaining CPE fields here? // TODO: should we also have the remaining CPE fields here?
} }
func (c *Cpe) BeforeCreate(tx *gorm.DB) (err error) { //func (c *Cpe) BeforeCreate(tx *gorm.DB) (err error) {
// if the name, major version, and minor version already exist in the table then we should not insert a new record // // if the name, major version, and minor version already exist in the table then we should not insert a new record
var existing Cpe // var existing Cpe
result := tx.Where("schema = ? AND type = ? AND vendor = ? AND product = ? AND version = ? AND version_update = ? AND target_software = ?", c.Schema, c.Type, c.Vendor, c.Product, c.Version, c.Update, c.TargetSoftware).First(&existing) // result := tx.Where("schema = ? AND type = ? AND vendor = ? AND product = ? AND version = ? AND version_update = ? AND target_software = ?", c.Schema, c.Type, c.Vendor, c.Product, c.Version, c.Update, c.TargetSoftware).First(&existing)
if result.Error == nil { // if result.Error == nil {
// if the record already exists, then we should use the existing record // // if the record already exists, then we should use the existing record
*c = existing // *c = existing
} // }
return nil // return nil
} //}
// Digest represents arbitrary digests that can be associated with a vulnerability such that if found the material can be considered to be affected by this vulnerability // Digest represents arbitrary digests that can be associated with a vulnerability such that if found the material can be considered to be affected by this vulnerability
type Digest struct { type Digest struct {
ID int64 `gorm:"column:id;primaryKey"` Algorithm string `gorm:"column:algorithm"`
Value string `gorm:"column:value"`
Algorithm string `gorm:"column:algorithm;not null"`
Value string `gorm:"column:value;not null"`
} }
type Package struct { type Package struct {
// TODO: setup unique indexes only for writing and drop before shipping for the best size tradeoff // TODO: setup unique indexes only for writing and drop before shipping for the best size tradeoff
ID int64 `gorm:"column:id;primaryKey"`
// TODO: break purl out into fields here // TODO: break purl out into fields here
Ecosystem *string `gorm:"column:ecosystem;index:idx_package,unique"` // TODO: NVD doesn't have this, should this be nullable? Ecosystem *string `gorm:"column:ecosystem;index:idx_package"` // TODO: NVD doesn't have this, should this be nullable?
PackageName string `gorm:"column:package_name;index:idx_package,unique"` Name string `gorm:"column:name;index:idx_package"`
//OperatingSystemID *int64 `gorm:"column:operating_system_id"` //OperatingSystemID *int64 `gorm:"column:operating_system_id"`
//OperatingSystem *OperatingSystem `gorm:"foreignKey:OperatingSystemID"` //OperatingSystem *OperatingSystem `gorm:"foreignKey:OperatingSystemID"`
@ -340,16 +375,16 @@ type Package struct {
} }
func (c *Package) BeforeCreate(tx *gorm.DB) (err error) { //func (c *Package) BeforeCreate(tx *gorm.DB) (err error) {
// if the name, major version, and minor version already exist in the table then we should not insert a new record // // if the name, major version, and minor version already exist in the table then we should not insert a new record
var existing Package // var existing Package
result := tx.Where("package_name = ? AND ecosystem = ?", c.PackageName, c.Ecosystem).First(&existing) // result := tx.Where("name = ? AND ecosystem = ?", c.Name, c.Ecosystem).First(&existing)
if result.Error == nil { // if result.Error == nil {
// if the record already exists, then we should use the existing record // // if the record already exists, then we should use the existing record
*c = existing // *c = existing
} // }
return nil // return nil
} //}
//type Purl struct { //type Purl struct {
// ID int64 `gorm:"column:id;primaryKey"` // ID int64 `gorm:"column:id;primaryKey"`

View file

@ -44,8 +44,8 @@ func (s *vulnerabilityStore) AddVulnerabilities(vulnerabilities ...*Vulnerabilit
for _, h := range []func([]*Vulnerability) error{ for _, h := range []func([]*Vulnerability) error{
s.handleOSs, s.handleOSs,
s.handleRangeEvents, s.handleRangeEvents,
s.handleCPEs, //s.handleCPEs,
s.handlePackages, //s.handlePackages,
} { } {
if err := h(vulnerabilities); err != nil { if err := h(vulnerabilities); err != nil {
return err return err
@ -56,154 +56,154 @@ func (s *vulnerabilityStore) AddVulnerabilities(vulnerabilities ...*Vulnerabilit
return s.db.CreateInBatches(vulnerabilities, s.BatchSize).Error return s.db.CreateInBatches(vulnerabilities, s.BatchSize).Error
} }
func (s *vulnerabilityStore) handlePackages(vulns []*Vulnerability) error { //func (s *vulnerabilityStore) handlePackages(vulns []*Vulnerability) error {
// ensure unique packages // // ensure unique packages
unique, err := ensureUniquePackages(s.db, vulns) // unique, err := ensureUniquePackages(s.db, vulns)
if err != nil { // if err != nil {
return err // return err
} // }
//
// // update vulnerabilities with package IDs
// updateAffectedsWithPackages(vulns, unique)
//
// return nil
//}
//
//func ensureUniquePackages(db *gorm.DB, vulns []*Vulnerability) ([]*Package, error) {
// // map to track unique packages
// pkgMap := make(map[string]*Package)
// for _, v := range vulns {
// for _, a := range *v.Affected {
// if a.Packages == nil {
// continue
// }
// pkgs := *a.Packages
// for i, p := range pkgs {
// pkgKey := fmt.Sprintf("%s:%s", strVal(p.Ecosystem), p.PackageName)
// val := &pkgs[i]
// pkgMap[pkgKey] = val
// }
// }
// }
//
// // extract unique packages
// var uniquePackages []*Package
// for _, pkg := range pkgMap {
// uniquePackages = append(uniquePackages, pkg)
// }
//
// // insert unique packages into the database or fetch existing ones
// for i, pkg := range uniquePackages {
// var existing Package
// err := db.Where("package_name = ? AND ecosystem = ?", pkg.Name, pkg.Ecosystem).
// FirstOrCreate(&existing, pkg).Error
// if err != nil {
// return nil, err
// }
// uniquePackages[i].ID = existing.ID
// }
//
// return uniquePackages, nil
//
//}
//
//func updateAffectedsWithPackages(vulns []*Vulnerability, uniquePackages []*Package) {
// pkgMap := make(map[string]int64)
// for _, p := range uniquePackages {
// pkgKey := fmt.Sprintf("%s:%s", strVal(p.Ecosystem), p.Name)
// pkgMap[pkgKey] = p.ID
// }
//
// for i, v := range vulns {
// for j, a := range *v.Affected {
// if a.Packages == nil {
// continue
// }
// pkgs := *a.Packages
// for k, p := range pkgs {
// pkgKey := fmt.Sprintf("%s:%s", strVal(p.Ecosystem), p.PackageName)
// val := pkgMap[pkgKey]
// (*(*vulns[i].Affected)[j].Packages)[k].ID = val
// }
// }
// }
//
//}
// update vulnerabilities with package IDs //func (s *vulnerabilityStore) handleCPEs(vulns []*Vulnerability) error {
updateAffectedsWithPackages(vulns, unique) // // ensure unique cpes
// unique, err := ensureUniqueCPEs(s.db, vulns)
return nil // if err != nil {
} // return err
// }
func ensureUniquePackages(db *gorm.DB, vulns []*Vulnerability) ([]*Package, error) { //
// map to track unique packages // // update vulnerabilities with cpes IDs
pkgMap := make(map[string]*Package) // updateAffectedsWithCPEs(vulns, unique)
for _, v := range vulns { //
for _, a := range *v.Affected { // return nil
if a.Packages == nil { //}
continue //
} //func updateAffectedsWithCPEs(vulns []*Vulnerability, uniqueCPEs []*Cpe) {
pkgs := *a.Packages // cpeMap := make(map[string]int64)
for i, p := range pkgs { // for _, c := range uniqueCPEs {
pkgKey := fmt.Sprintf("%s:%s", strVal(p.Ecosystem), p.PackageName) // cpeKey := fmt.Sprintf("%s:%s:%s:%s:%s:%s:%s", c.Schema, c.Type, strVal(c.Vendor), c.Product, strVal(c.Version), strVal(c.Update), strVal(c.TargetSoftware))
val := &pkgs[i] // cpeMap[cpeKey] = c.ID
pkgMap[pkgKey] = val // }
} //
} // for i, v := range vulns {
} // if v.Affected == nil {
// continue
// extract unique packages // }
var uniquePackages []*Package // for j, a := range *v.Affected {
for _, pkg := range pkgMap { // if a.Cpes == nil {
uniquePackages = append(uniquePackages, pkg) // continue
} // }
// for k, c := range *a.Cpes {
// insert unique packages into the database or fetch existing ones // cpeKey := fmt.Sprintf("%s:%s:%s:%s:%s:%s:%s", c.Schema, c.Type, strVal(c.Vendor), c.Product, strVal(c.Version), strVal(c.Update), strVal(c.TargetSoftware))
for i, pkg := range uniquePackages { // val := cpeMap[cpeKey]
var existing Package // (*(*vulns[i].Affected)[j].Cpes)[k].ID = val
err := db.Where("package_name = ? AND ecosystem = ?", pkg.PackageName, pkg.Ecosystem). // }
FirstOrCreate(&existing, pkg).Error // }
if err != nil { // }
return nil, err //}
} //
uniquePackages[i].ID = existing.ID //func ensureUniqueCPEs(db *gorm.DB, vulns []*Vulnerability) ([]*Cpe, error) {
} // cpeMap := make(map[string]Cpe)
// for _, v := range vulns {
return uniquePackages, nil // if v.Affected == nil {
// continue
} // }
// for _, a := range *v.Affected {
func updateAffectedsWithPackages(vulns []*Vulnerability, uniquePackages []*Package) { // if a.Cpes == nil {
pkgMap := make(map[string]int64) // continue
for _, p := range uniquePackages { // }
pkgKey := fmt.Sprintf("%s:%s", strVal(p.Ecosystem), p.PackageName) // for _, c := range *a.Cpes {
pkgMap[pkgKey] = p.ID // cpeKey := fmt.Sprintf("%s:%s:%s:%s:%s:%s:%s", c.Schema, c.Type, strVal(c.Vendor), c.Product, strVal(c.Version), strVal(c.Update), strVal(c.TargetSoftware))
} // cpeMap[cpeKey] = c
// }
for i, v := range vulns { // }
for j, a := range *v.Affected { // }
if a.Packages == nil { //
continue // // extract unique CPEs
} // var uniqueCPEs []*Cpe
pkgs := *a.Packages // for i := range cpeMap {
for k, p := range pkgs { // c := cpeMap[i]
pkgKey := fmt.Sprintf("%s:%s", strVal(p.Ecosystem), p.PackageName) // uniqueCPEs = append(uniqueCPEs, &c)
val := pkgMap[pkgKey] // }
(*(*vulns[i].Affected)[j].Packages)[k].ID = val //
} // // insert unique CPEs into the database or fetch existing ones
} // for i, c := range uniqueCPEs {
} // var existing Cpe
// err := db.Where("schema = ? AND type = ? AND vendor = ? AND product = ? AND version = ? AND version_update = ? AND target_software = ?", c.Schema, c.Type, c.Vendor, c.Product, c.Version, c.Update, c.TargetSoftware).
} // FirstOrCreate(&existing, c).Error
// if err != nil {
func (s *vulnerabilityStore) handleCPEs(vulns []*Vulnerability) error { // return nil, err
// ensure unique cpes // }
unique, err := ensureUniqueCPEs(s.db, vulns) // uniqueCPEs[i] = &existing
if err != nil { // }
return err //
} // return uniqueCPEs, nil
//}
// update vulnerabilities with cpes IDs
updateAffectedsWithCPEs(vulns, unique)
return nil
}
func updateAffectedsWithCPEs(vulns []*Vulnerability, uniqueCPEs []*Cpe) {
cpeMap := make(map[string]int64)
for _, c := range uniqueCPEs {
cpeKey := fmt.Sprintf("%s:%s:%s:%s:%s:%s:%s", c.Schema, c.Type, strVal(c.Vendor), c.Product, strVal(c.Version), strVal(c.Update), strVal(c.TargetSoftware))
cpeMap[cpeKey] = c.ID
}
for i, v := range vulns {
if v.Affected == nil {
continue
}
for j, a := range *v.Affected {
if a.Cpes == nil {
continue
}
for k, c := range *a.Cpes {
cpeKey := fmt.Sprintf("%s:%s:%s:%s:%s:%s:%s", c.Schema, c.Type, strVal(c.Vendor), c.Product, strVal(c.Version), strVal(c.Update), strVal(c.TargetSoftware))
val := cpeMap[cpeKey]
(*(*vulns[i].Affected)[j].Cpes)[k].ID = val
}
}
}
}
func ensureUniqueCPEs(db *gorm.DB, vulns []*Vulnerability) ([]*Cpe, error) {
cpeMap := make(map[string]Cpe)
for _, v := range vulns {
if v.Affected == nil {
continue
}
for _, a := range *v.Affected {
if a.Cpes == nil {
continue
}
for _, c := range *a.Cpes {
cpeKey := fmt.Sprintf("%s:%s:%s:%s:%s:%s:%s", c.Schema, c.Type, strVal(c.Vendor), c.Product, strVal(c.Version), strVal(c.Update), strVal(c.TargetSoftware))
cpeMap[cpeKey] = c
}
}
}
// extract unique CPEs
var uniqueCPEs []*Cpe
for i := range cpeMap {
c := cpeMap[i]
uniqueCPEs = append(uniqueCPEs, &c)
}
// insert unique CPEs into the database or fetch existing ones
for i, c := range uniqueCPEs {
var existing Cpe
err := db.Where("schema = ? AND type = ? AND vendor = ? AND product = ? AND version = ? AND version_update = ? AND target_software = ?", c.Schema, c.Type, c.Vendor, c.Product, c.Version, c.Update, c.TargetSoftware).
FirstOrCreate(&existing, c).Error
if err != nil {
return nil, err
}
uniqueCPEs[i] = &existing
}
return uniqueCPEs, nil
}
func (s *vulnerabilityStore) handleRangeEvents(vulns []*Vulnerability) error { func (s *vulnerabilityStore) handleRangeEvents(vulns []*Vulnerability) error {
// ensure unique operating systems // ensure unique operating systems