denormalize affected fields

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>
This commit is contained in:
Alex Goodman 2024-06-21 17:34:52 -04:00
parent 87292dc353
commit 536bd2f6af
4 changed files with 227 additions and 192 deletions

View file

@ -56,8 +56,8 @@ func populateFixture1(store v6.Store) error {
Affected: &[]v6.Affected{
{
Package: &v6.Package{
Ecosystem: "python",
PackageName: "example-python-package1",
Ecosystem: "python",
Name: "example-python-package1",
Purls: &[]v6.Purl{
{
Scheme: "pkg",
@ -92,8 +92,8 @@ func populateFixture1(store v6.Store) error {
Affected: &[]v6.Affected{
{
Package: &v6.Package{
Ecosystem: "golang",
PackageName: "example-golang-package1",
Ecosystem: "golang",
Name: "example-golang-package1",
Purls: &[]v6.Purl{
{
Scheme: "pkg",
@ -133,8 +133,8 @@ func populateFixture1(store v6.Store) error {
Affected: &[]v6.Affected{
{
Package: &v6.Package{
Ecosystem: "python",
PackageName: "example-python-package2",
Ecosystem: "python",
Name: "example-python-package2",
Purls: &[]v6.Purl{
{
Scheme: "pkg",
@ -177,8 +177,8 @@ func populateFixture1(store v6.Store) error {
Affected: &[]v6.Affected{
{
Package: &v6.Package{
Ecosystem: "python",
PackageName: fmt.Sprintf("example-python-package%d", i+3),
Ecosystem: "python",
Name: fmt.Sprintf("example-python-package%d", i+3),
Purls: &[]v6.Purl{
{
Scheme: "pkg",

View file

@ -188,7 +188,7 @@ func (v affectedPackageStringer) String() string {
if v.Package == nil {
return "Package=<nil>"
}
return fmt.Sprintf("Package[%d]{Ecosystem=%q, PackageName=%q, %v}", v.ID, v.Ecosystem, v.PackageName, operatingSystemStringer{v.OperatingSystem})
return fmt.Sprintf("Package[%d]{Ecosystem=%q, Name=%q, %v}", v.ID, v.Ecosystem, v.Name, operatingSystemStringer{v.OperatingSystem})
}
type operatingSystemStringer struct {

View file

@ -1,6 +1,7 @@
package v6
import (
"fmt"
"gorm.io/datatypes"
"gorm.io/gorm"
"time"
@ -85,6 +86,46 @@ type Vulnerability struct {
// Affected is a list of affected entries related to this vulnerability
Affected *[]Affected `gorm:"foreignKey:VulnerabilityID"`
affected *[]Affected `gorm:"-"`
}
func (c *Vulnerability) BeforeCreate(tx *gorm.DB) error {
// if the len of Affected is > 500, then create those in batches and then attach those to the Vulnerability
if c.Affected != nil && len(*c.Affected) > 500 {
c.affected = c.Affected
c.Affected = nil
}
return nil
}
func (c *Vulnerability) AfterCreate(tx *gorm.DB) error {
if c.affected == nil {
return nil
}
// create in batches...
var affecteds []*Affected
affs := *c.affected
for i := range affs {
a := affs[i]
a.VulnerabilityID = c.ID
affecteds = append(affecteds, &a)
}
if err := tx.CreateInBatches(affecteds, 500).Error; err != nil {
return fmt.Errorf("failed to create affecteds in batches: %w", err)
}
affs = make([]Affected, len(affecteds))
for i := range affecteds {
affs[i] = *affecteds[i]
}
c.Affected = &affs
c.affected = nil
return nil
}
// TODO: can I do this?
@ -192,10 +233,10 @@ type Affected struct {
Severities *datatypes.JSONSlice[AffectedSeverity] `gorm:"column:severities"`
PackageQualifier *datatypes.JSON `gorm:"column:package_qualifier"`
Range *[]Range `gorm:"foreignKey:AffectedID"`
Packages *[]Package `gorm:"many2many:affected_packages"`
Digests *[]Digest `gorm:"many2many:affected_digests"`
Cpes *[]Cpe `gorm:"many2many:affected_cpes"`
Range *[]Range `gorm:"foreignKey:AffectedID"`
Package *Package `gorm:"embedded;embeddedPrefix:package_"`
Digest *Digest `gorm:"embedded;embeddedPrefix:digest_"`
Cpes *datatypes.JSONSlice[Cpe] `gorm:"column:cpes"`
}
// TODO: add later and reuse existing similar tables with many2many
@ -291,8 +332,6 @@ func (re *RangeEvent) BeforeCreate(tx *gorm.DB) (err error) {
type Cpe struct {
// TODO: what about different CPE versions?
ID int64 `gorm:"column:id;primaryKey"`
Schema string `gorm:"column:schema;not null;index:idx_cpe"` // effectively the CPE version
Type string `gorm:"column:type;not null;index:idx_cpe"`
Vendor *string `gorm:"column:vendor;index:idx_cpe"`
@ -304,33 +343,29 @@ type Cpe struct {
// TODO: should we also have the remaining CPE fields here?
}
func (c *Cpe) BeforeCreate(tx *gorm.DB) (err error) {
// if the name, major version, and minor version already exist in the table then we should not insert a new record
var existing Cpe
result := tx.Where("schema = ? AND type = ? AND vendor = ? AND product = ? AND version = ? AND version_update = ? AND target_software = ?", c.Schema, c.Type, c.Vendor, c.Product, c.Version, c.Update, c.TargetSoftware).First(&existing)
if result.Error == nil {
// if the record already exists, then we should use the existing record
*c = existing
}
return nil
}
//func (c *Cpe) BeforeCreate(tx *gorm.DB) (err error) {
// // if the name, major version, and minor version already exist in the table then we should not insert a new record
// var existing Cpe
// result := tx.Where("schema = ? AND type = ? AND vendor = ? AND product = ? AND version = ? AND version_update = ? AND target_software = ?", c.Schema, c.Type, c.Vendor, c.Product, c.Version, c.Update, c.TargetSoftware).First(&existing)
// if result.Error == nil {
// // if the record already exists, then we should use the existing record
// *c = existing
// }
// return nil
//}
// Digest represents arbitrary digests that can be associated with a vulnerability such that if found the material can be considered to be affected by this vulnerability
type Digest struct {
ID int64 `gorm:"column:id;primaryKey"`
Algorithm string `gorm:"column:algorithm;not null"`
Value string `gorm:"column:value;not null"`
Algorithm string `gorm:"column:algorithm"`
Value string `gorm:"column:value"`
}
type Package struct {
// TODO: setup unique indexes only for writing and drop before shipping for the best size tradeoff
ID int64 `gorm:"column:id;primaryKey"`
// TODO: break purl out into fields here
Ecosystem *string `gorm:"column:ecosystem;index:idx_package,unique"` // TODO: NVD doesn't have this, should this be nullable?
PackageName string `gorm:"column:package_name;index:idx_package,unique"`
Ecosystem *string `gorm:"column:ecosystem;index:idx_package"` // TODO: NVD doesn't have this, should this be nullable?
Name string `gorm:"column:name;index:idx_package"`
//OperatingSystemID *int64 `gorm:"column:operating_system_id"`
//OperatingSystem *OperatingSystem `gorm:"foreignKey:OperatingSystemID"`
@ -340,16 +375,16 @@ type Package struct {
}
func (c *Package) BeforeCreate(tx *gorm.DB) (err error) {
// if the name, major version, and minor version already exist in the table then we should not insert a new record
var existing Package
result := tx.Where("package_name = ? AND ecosystem = ?", c.PackageName, c.Ecosystem).First(&existing)
if result.Error == nil {
// if the record already exists, then we should use the existing record
*c = existing
}
return nil
}
//func (c *Package) BeforeCreate(tx *gorm.DB) (err error) {
// // if the name, major version, and minor version already exist in the table then we should not insert a new record
// var existing Package
// result := tx.Where("name = ? AND ecosystem = ?", c.Name, c.Ecosystem).First(&existing)
// if result.Error == nil {
// // if the record already exists, then we should use the existing record
// *c = existing
// }
// return nil
//}
//type Purl struct {
// ID int64 `gorm:"column:id;primaryKey"`

View file

@ -44,8 +44,8 @@ func (s *vulnerabilityStore) AddVulnerabilities(vulnerabilities ...*Vulnerabilit
for _, h := range []func([]*Vulnerability) error{
s.handleOSs,
s.handleRangeEvents,
s.handleCPEs,
s.handlePackages,
//s.handleCPEs,
//s.handlePackages,
} {
if err := h(vulnerabilities); err != nil {
return err
@ -56,154 +56,154 @@ func (s *vulnerabilityStore) AddVulnerabilities(vulnerabilities ...*Vulnerabilit
return s.db.CreateInBatches(vulnerabilities, s.BatchSize).Error
}
func (s *vulnerabilityStore) handlePackages(vulns []*Vulnerability) error {
// ensure unique packages
unique, err := ensureUniquePackages(s.db, vulns)
if err != nil {
return err
}
//func (s *vulnerabilityStore) handlePackages(vulns []*Vulnerability) error {
// // ensure unique packages
// unique, err := ensureUniquePackages(s.db, vulns)
// if err != nil {
// return err
// }
//
// // update vulnerabilities with package IDs
// updateAffectedsWithPackages(vulns, unique)
//
// return nil
//}
//
//func ensureUniquePackages(db *gorm.DB, vulns []*Vulnerability) ([]*Package, error) {
// // map to track unique packages
// pkgMap := make(map[string]*Package)
// for _, v := range vulns {
// for _, a := range *v.Affected {
// if a.Packages == nil {
// continue
// }
// pkgs := *a.Packages
// for i, p := range pkgs {
// pkgKey := fmt.Sprintf("%s:%s", strVal(p.Ecosystem), p.PackageName)
// val := &pkgs[i]
// pkgMap[pkgKey] = val
// }
// }
// }
//
// // extract unique packages
// var uniquePackages []*Package
// for _, pkg := range pkgMap {
// uniquePackages = append(uniquePackages, pkg)
// }
//
// // insert unique packages into the database or fetch existing ones
// for i, pkg := range uniquePackages {
// var existing Package
// err := db.Where("package_name = ? AND ecosystem = ?", pkg.Name, pkg.Ecosystem).
// FirstOrCreate(&existing, pkg).Error
// if err != nil {
// return nil, err
// }
// uniquePackages[i].ID = existing.ID
// }
//
// return uniquePackages, nil
//
//}
//
//func updateAffectedsWithPackages(vulns []*Vulnerability, uniquePackages []*Package) {
// pkgMap := make(map[string]int64)
// for _, p := range uniquePackages {
// pkgKey := fmt.Sprintf("%s:%s", strVal(p.Ecosystem), p.Name)
// pkgMap[pkgKey] = p.ID
// }
//
// for i, v := range vulns {
// for j, a := range *v.Affected {
// if a.Packages == nil {
// continue
// }
// pkgs := *a.Packages
// for k, p := range pkgs {
// pkgKey := fmt.Sprintf("%s:%s", strVal(p.Ecosystem), p.PackageName)
// val := pkgMap[pkgKey]
// (*(*vulns[i].Affected)[j].Packages)[k].ID = val
// }
// }
// }
//
//}
// update vulnerabilities with package IDs
updateAffectedsWithPackages(vulns, unique)
return nil
}
func ensureUniquePackages(db *gorm.DB, vulns []*Vulnerability) ([]*Package, error) {
// map to track unique packages
pkgMap := make(map[string]*Package)
for _, v := range vulns {
for _, a := range *v.Affected {
if a.Packages == nil {
continue
}
pkgs := *a.Packages
for i, p := range pkgs {
pkgKey := fmt.Sprintf("%s:%s", strVal(p.Ecosystem), p.PackageName)
val := &pkgs[i]
pkgMap[pkgKey] = val
}
}
}
// extract unique packages
var uniquePackages []*Package
for _, pkg := range pkgMap {
uniquePackages = append(uniquePackages, pkg)
}
// insert unique packages into the database or fetch existing ones
for i, pkg := range uniquePackages {
var existing Package
err := db.Where("package_name = ? AND ecosystem = ?", pkg.PackageName, pkg.Ecosystem).
FirstOrCreate(&existing, pkg).Error
if err != nil {
return nil, err
}
uniquePackages[i].ID = existing.ID
}
return uniquePackages, nil
}
func updateAffectedsWithPackages(vulns []*Vulnerability, uniquePackages []*Package) {
pkgMap := make(map[string]int64)
for _, p := range uniquePackages {
pkgKey := fmt.Sprintf("%s:%s", strVal(p.Ecosystem), p.PackageName)
pkgMap[pkgKey] = p.ID
}
for i, v := range vulns {
for j, a := range *v.Affected {
if a.Packages == nil {
continue
}
pkgs := *a.Packages
for k, p := range pkgs {
pkgKey := fmt.Sprintf("%s:%s", strVal(p.Ecosystem), p.PackageName)
val := pkgMap[pkgKey]
(*(*vulns[i].Affected)[j].Packages)[k].ID = val
}
}
}
}
func (s *vulnerabilityStore) handleCPEs(vulns []*Vulnerability) error {
// ensure unique cpes
unique, err := ensureUniqueCPEs(s.db, vulns)
if err != nil {
return err
}
// update vulnerabilities with cpes IDs
updateAffectedsWithCPEs(vulns, unique)
return nil
}
func updateAffectedsWithCPEs(vulns []*Vulnerability, uniqueCPEs []*Cpe) {
cpeMap := make(map[string]int64)
for _, c := range uniqueCPEs {
cpeKey := fmt.Sprintf("%s:%s:%s:%s:%s:%s:%s", c.Schema, c.Type, strVal(c.Vendor), c.Product, strVal(c.Version), strVal(c.Update), strVal(c.TargetSoftware))
cpeMap[cpeKey] = c.ID
}
for i, v := range vulns {
if v.Affected == nil {
continue
}
for j, a := range *v.Affected {
if a.Cpes == nil {
continue
}
for k, c := range *a.Cpes {
cpeKey := fmt.Sprintf("%s:%s:%s:%s:%s:%s:%s", c.Schema, c.Type, strVal(c.Vendor), c.Product, strVal(c.Version), strVal(c.Update), strVal(c.TargetSoftware))
val := cpeMap[cpeKey]
(*(*vulns[i].Affected)[j].Cpes)[k].ID = val
}
}
}
}
func ensureUniqueCPEs(db *gorm.DB, vulns []*Vulnerability) ([]*Cpe, error) {
cpeMap := make(map[string]Cpe)
for _, v := range vulns {
if v.Affected == nil {
continue
}
for _, a := range *v.Affected {
if a.Cpes == nil {
continue
}
for _, c := range *a.Cpes {
cpeKey := fmt.Sprintf("%s:%s:%s:%s:%s:%s:%s", c.Schema, c.Type, strVal(c.Vendor), c.Product, strVal(c.Version), strVal(c.Update), strVal(c.TargetSoftware))
cpeMap[cpeKey] = c
}
}
}
// extract unique CPEs
var uniqueCPEs []*Cpe
for i := range cpeMap {
c := cpeMap[i]
uniqueCPEs = append(uniqueCPEs, &c)
}
// insert unique CPEs into the database or fetch existing ones
for i, c := range uniqueCPEs {
var existing Cpe
err := db.Where("schema = ? AND type = ? AND vendor = ? AND product = ? AND version = ? AND version_update = ? AND target_software = ?", c.Schema, c.Type, c.Vendor, c.Product, c.Version, c.Update, c.TargetSoftware).
FirstOrCreate(&existing, c).Error
if err != nil {
return nil, err
}
uniqueCPEs[i] = &existing
}
return uniqueCPEs, nil
}
//func (s *vulnerabilityStore) handleCPEs(vulns []*Vulnerability) error {
// // ensure unique cpes
// unique, err := ensureUniqueCPEs(s.db, vulns)
// if err != nil {
// return err
// }
//
// // update vulnerabilities with cpes IDs
// updateAffectedsWithCPEs(vulns, unique)
//
// return nil
//}
//
//func updateAffectedsWithCPEs(vulns []*Vulnerability, uniqueCPEs []*Cpe) {
// cpeMap := make(map[string]int64)
// for _, c := range uniqueCPEs {
// cpeKey := fmt.Sprintf("%s:%s:%s:%s:%s:%s:%s", c.Schema, c.Type, strVal(c.Vendor), c.Product, strVal(c.Version), strVal(c.Update), strVal(c.TargetSoftware))
// cpeMap[cpeKey] = c.ID
// }
//
// for i, v := range vulns {
// if v.Affected == nil {
// continue
// }
// for j, a := range *v.Affected {
// if a.Cpes == nil {
// continue
// }
// for k, c := range *a.Cpes {
// cpeKey := fmt.Sprintf("%s:%s:%s:%s:%s:%s:%s", c.Schema, c.Type, strVal(c.Vendor), c.Product, strVal(c.Version), strVal(c.Update), strVal(c.TargetSoftware))
// val := cpeMap[cpeKey]
// (*(*vulns[i].Affected)[j].Cpes)[k].ID = val
// }
// }
// }
//}
//
//func ensureUniqueCPEs(db *gorm.DB, vulns []*Vulnerability) ([]*Cpe, error) {
// cpeMap := make(map[string]Cpe)
// for _, v := range vulns {
// if v.Affected == nil {
// continue
// }
// for _, a := range *v.Affected {
// if a.Cpes == nil {
// continue
// }
// for _, c := range *a.Cpes {
// cpeKey := fmt.Sprintf("%s:%s:%s:%s:%s:%s:%s", c.Schema, c.Type, strVal(c.Vendor), c.Product, strVal(c.Version), strVal(c.Update), strVal(c.TargetSoftware))
// cpeMap[cpeKey] = c
// }
// }
// }
//
// // extract unique CPEs
// var uniqueCPEs []*Cpe
// for i := range cpeMap {
// c := cpeMap[i]
// uniqueCPEs = append(uniqueCPEs, &c)
// }
//
// // insert unique CPEs into the database or fetch existing ones
// for i, c := range uniqueCPEs {
// var existing Cpe
// err := db.Where("schema = ? AND type = ? AND vendor = ? AND product = ? AND version = ? AND version_update = ? AND target_software = ?", c.Schema, c.Type, c.Vendor, c.Product, c.Version, c.Update, c.TargetSoftware).
// FirstOrCreate(&existing, c).Error
// if err != nil {
// return nil, err
// }
// uniqueCPEs[i] = &existing
// }
//
// return uniqueCPEs, nil
//}
func (s *vulnerabilityStore) handleRangeEvents(vulns []*Vulnerability) error {
// ensure unique operating systems