Introduce indexed embedded CPE dictionary (#1897)

* Introduce indexed embedded CPE dictionary

Signed-off-by: Dan Luhring <dluhring@chainguard.dev>

* Don't generate cpe-index on make snapshot

Signed-off-by: Dan Luhring <dluhring@chainguard.dev>

* Add unit tests for individual addEntry funcs

Signed-off-by: Dan Luhring <dluhring@chainguard.dev>

* migrate CPE index build to go generate and add periodic workflow

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* add test to ensure generated cpe index is wired up to function that uses it

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

---------

Signed-off-by: Dan Luhring <dluhring@chainguard.dev>
Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>
Co-authored-by: Alex Goodman <wagoodman@users.noreply.github.com>
This commit is contained in:
Dan Luhring 2023-07-21 09:54:19 -04:00 committed by GitHub
parent 3f5c601620
commit 99d172f0d1
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
16 changed files with 26855 additions and 4 deletions

View file

@ -6,7 +6,7 @@ on:
workflow_dispatch: workflow_dispatch:
env: env:
GO_VERSION: "1.19.x" GO_VERSION: "1.20.x"
GO_STABLE_VERSION: true GO_STABLE_VERSION: true
jobs: jobs:

View file

@ -0,0 +1,43 @@
name: PR to update CPE dictionary index
on:
schedule:
- cron: "0 1 * * 1" # every monday at 1 AM
workflow_dispatch:
env:
GO_VERSION: "1.20.x"
GO_STABLE_VERSION: true
jobs:
upgrade-cpe-dictionary-index:
runs-on: ubuntu-latest
if: github.repository == 'anchore/syft' # only run for main repo
steps:
- uses: actions/checkout@v3
- uses: actions/setup-go@v4
with:
go-version: ${{ env.GO_VERSION }}
stable: ${{ env.GO_STABLE_VERSION }}
- run: |
make generate-cpe-dictionary-index
- uses: tibdex/github-app-token@v1
id: generate-token
with:
app_id: ${{ secrets.TOKEN_APP_ID }}
private_key: ${{ secrets.TOKEN_APP_PRIVATE_KEY }}
- uses: peter-evans/create-pull-request@v5
with:
signoff: true
delete-branch: true
branch: auto/latest-cpe-dictionary-index
labels: dependencies
commit-message: "chore(deps): update CPE dictionary index"
title: "chore(deps): update CPE dictionary index"
body: |
Update CPE dictionary index based on the latest available CPE dictionary
token: ${{ steps.generate-token.outputs.token }}

View file

@ -6,7 +6,7 @@ on:
workflow_dispatch: workflow_dispatch:
env: env:
GO_VERSION: "1.19.x" GO_VERSION: "1.20.x"
GO_STABLE_VERSION: true GO_STABLE_VERSION: true
jobs: jobs:

View file

@ -298,7 +298,7 @@ compare-test-rpm-package-install: $(TEMP_DIR) $(SNAPSHOT_DIR)
$(TEMP_DIR) $(TEMP_DIR)
## Code generation targets ################################# ## Code and data generation targets #################################
.PHONY: generate-json-schema .PHONY: generate-json-schema
generate-json-schema: ## Generate a new json schema generate-json-schema: ## Generate a new json schema
@ -309,6 +309,11 @@ generate-license-list: ## Generate an updated spdx license list
go generate ./internal/spdxlicense/... go generate ./internal/spdxlicense/...
gofmt -s -w ./internal/spdxlicense gofmt -s -w ./internal/spdxlicense
.PHONY: generate-cpe-dictionary-index
generate-cpe-dictionary-index: ## Build the CPE index based off of the latest available CPE dictionary
$(call title,Building CPE index)
go generate ./syft/pkg/cataloger/common/cpe/dictionary
## Build-related targets ################################# ## Build-related targets #################################

View file

@ -76,7 +76,14 @@ func runCataloger(cataloger pkg.Cataloger, resolver file.Resolver) (catalogerRes
for _, p := range packages { for _, p := range packages {
// generate CPEs (note: this is excluded from package ID, so is safe to mutate) // generate CPEs (note: this is excluded from package ID, so is safe to mutate)
// we might have binary classified CPE already with the package so we want to append here // we might have binary classified CPE already with the package so we want to append here
p.CPEs = append(p.CPEs, cpe.Generate(p)...)
dictionaryCPE, ok := cpe.DictionaryFind(p)
if ok {
log.Debugf("used CPE dictionary to find CPE for %s package %q: %s", p.Type, p.Name, dictionaryCPE.BindToFmtString())
p.CPEs = append(p.CPEs, dictionaryCPE)
} else {
p.CPEs = append(p.CPEs, cpe.Generate(p)...)
}
// if we were not able to identify the language we have an opportunity // if we were not able to identify the language we have an opportunity
// to try and get this value from the PURL. Worst case we assert that // to try and get this value from the PURL. Worst case we assert that

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,3 @@
package dictionary
//go:generate go run ./index-generator/ -o data/cpe-index.json

View file

@ -0,0 +1,230 @@
package main
import (
"compress/gzip"
"encoding/json"
"encoding/xml"
"fmt"
"io"
"log"
"strings"
"github.com/facebookincubator/nvdtools/wfn"
"golang.org/x/exp/slices"
"github.com/anchore/syft/syft/pkg/cataloger/common/cpe/dictionary"
)
func generateIndexedDictionaryJSON(rawGzipData io.Reader) ([]byte, error) {
gzipReader, err := gzip.NewReader(rawGzipData)
if err != nil {
return nil, fmt.Errorf("unable to decompress CPE dictionary: %w", err)
}
defer gzipReader.Close()
// Read XML data
data, err := io.ReadAll(gzipReader)
if err != nil {
return nil, fmt.Errorf("unable to read CPE dictionary: %w", err)
}
// Unmarshal XML
var cpeList CpeList
if err := xml.Unmarshal(data, &cpeList); err != nil {
return nil, fmt.Errorf("unable to unmarshal CPE dictionary XML: %w", err)
}
// Filter out data that's not applicable here
cpeList = filterCpeList(cpeList)
// Create indexed dictionary to help with looking up CPEs
indexedDictionary := indexCPEList(cpeList)
// Convert to JSON
jsonData, err := json.MarshalIndent(indexedDictionary, "", " ")
if err != nil {
return nil, fmt.Errorf("unable to marshal CPE dictionary to JSON: %w", err)
}
return jsonData, nil
}
// filterCpeList removes CPE items that are not applicable to software packages.
func filterCpeList(cpeList CpeList) CpeList {
var processedCpeList CpeList
seen := make(map[string]struct{})
for _, cpeItem := range cpeList.CpeItems {
// Skip CPE items that don't have any references.
if len(cpeItem.References) == 0 {
continue
}
// Skip CPE items where the CPE URI doesn't meet our criteria.
parsedName, err := wfn.Parse(cpeItem.Name)
if err != nil {
log.Printf("unable to parse CPE URI %q: %s", cpeItem.Name, err)
}
if slices.Contains([]string{"h", "o"}, parsedName.Part) {
continue
}
normalizedName := normalizeCPE(parsedName).BindToURI()
if _, ok := seen[normalizedName]; ok {
continue
}
seen[normalizedName] = struct{}{}
cpeItem.Name = normalizedName
parsedCPE, err := wfn.Parse(cpeItem.Cpe23Item.Name)
if err != nil {
log.Printf("unable to parse CPE value %q: %s", cpeItem.Cpe23Item.Name, err)
}
cpeItem.Cpe23Item.Name = normalizeCPE(parsedCPE).BindToFmtString()
processedCpeList.CpeItems = append(processedCpeList.CpeItems, cpeItem)
}
return processedCpeList
}
// normalizeCPE removes the version and update parts of a CPE.
func normalizeCPE(cpe *wfn.Attributes) *wfn.Attributes {
cpeCopy := *cpe
cpeCopy.Version = ""
cpeCopy.Update = ""
return &cpeCopy
}
const (
prefixForNPMPackages = "https://www.npmjs.com/package/"
prefixForRubyGems = "https://rubygems.org/gems/"
prefixForRubyGemsHTTP = "http://rubygems.org/gems/"
prefixForNativeRubyGems = "https://github.com/ruby/"
prefixForPyPIPackages = "https://pypi.org/project/"
prefixForJenkinsPlugins = "https://github.com/jenkinsci/"
prefixForRustCrates = "https://crates.io/crates/"
)
// indexCPEList creates an index of CPEs by ecosystem.
func indexCPEList(list CpeList) *dictionary.Indexed {
indexed := &dictionary.Indexed{
EcosystemPackages: make(map[string]dictionary.Packages),
}
for _, cpeItem := range list.CpeItems {
cpeItemName := cpeItem.Cpe23Item.Name
for _, reference := range cpeItem.References {
ref := reference.Reference.Href
switch {
case strings.HasPrefix(ref, prefixForNPMPackages):
addEntryForNPMPackage(indexed, ref, cpeItemName)
case strings.HasPrefix(ref, prefixForRubyGems), strings.HasPrefix(ref, prefixForRubyGemsHTTP):
addEntryForRubyGem(indexed, ref, cpeItemName)
case strings.HasPrefix(ref, prefixForNativeRubyGems):
addEntryForNativeRubyGem(indexed, ref, cpeItemName)
case strings.HasPrefix(ref, prefixForPyPIPackages):
addEntryForPyPIPackage(indexed, ref, cpeItemName)
case strings.HasPrefix(ref, prefixForJenkinsPlugins):
// It _might_ be a jenkins plugin!
addEntryForJenkinsPlugin(indexed, ref, cpeItemName)
case strings.HasPrefix(ref, prefixForRustCrates):
addEntryForRustCrate(indexed, ref, cpeItemName)
}
}
}
return indexed
}
func addEntryForRustCrate(indexed *dictionary.Indexed, ref string, cpeItemName string) {
// Prune off the non-package-name parts of the URL
ref = strings.TrimPrefix(ref, prefixForRustCrates)
ref = strings.Split(ref, "/")[0]
if _, ok := indexed.EcosystemPackages[dictionary.EcosystemRustCrates]; !ok {
indexed.EcosystemPackages[dictionary.EcosystemRustCrates] = make(dictionary.Packages)
}
indexed.EcosystemPackages[dictionary.EcosystemRustCrates][ref] = cpeItemName
}
func addEntryForJenkinsPlugin(indexed *dictionary.Indexed, ref string, cpeItemName string) {
// Prune off the non-package-name parts of the URL
ref = strings.TrimPrefix(ref, prefixForJenkinsPlugins)
ref = strings.Split(ref, "/")[0]
if !strings.HasSuffix(ref, "-plugin") {
// It's not a jenkins plugin!
return
}
ref = strings.TrimSuffix(ref, "-plugin")
if _, ok := indexed.EcosystemPackages[dictionary.EcosystemJenkinsPlugins]; !ok {
indexed.EcosystemPackages[dictionary.EcosystemJenkinsPlugins] = make(dictionary.Packages)
}
indexed.EcosystemPackages[dictionary.EcosystemJenkinsPlugins][ref] = cpeItemName
}
func addEntryForPyPIPackage(indexed *dictionary.Indexed, ref string, cpeItemName string) {
// Prune off the non-package-name parts of the URL
ref = strings.TrimPrefix(ref, prefixForPyPIPackages)
ref = strings.Split(ref, "/")[0]
if _, ok := indexed.EcosystemPackages[dictionary.EcosystemPyPI]; !ok {
indexed.EcosystemPackages[dictionary.EcosystemPyPI] = make(dictionary.Packages)
}
indexed.EcosystemPackages[dictionary.EcosystemPyPI][ref] = cpeItemName
}
func addEntryForNativeRubyGem(indexed *dictionary.Indexed, ref string, cpeItemName string) {
// Prune off the non-package-name parts of the URL
ref = strings.TrimPrefix(ref, prefixForNativeRubyGems)
ref = strings.Split(ref, "/")[0]
if _, ok := indexed.EcosystemPackages[dictionary.EcosystemRubyGems]; !ok {
indexed.EcosystemPackages[dictionary.EcosystemRubyGems] = make(dictionary.Packages)
}
indexed.EcosystemPackages[dictionary.EcosystemRubyGems][ref] = cpeItemName
}
func addEntryForRubyGem(indexed *dictionary.Indexed, ref string, cpeItemName string) {
// Prune off the non-package-name parts of the URL
ref = strings.TrimPrefix(ref, prefixForRubyGems)
ref = strings.TrimPrefix(ref, prefixForRubyGemsHTTP)
ref = strings.Split(ref, "/")[0]
if _, ok := indexed.EcosystemPackages[dictionary.EcosystemRubyGems]; !ok {
indexed.EcosystemPackages[dictionary.EcosystemRubyGems] = make(dictionary.Packages)
}
indexed.EcosystemPackages[dictionary.EcosystemRubyGems][ref] = cpeItemName
}
func addEntryForNPMPackage(indexed *dictionary.Indexed, ref string, cpeItemName string) {
// Prune off the non-package-name parts of the URL
ref = strings.Split(ref, "/v/")[0]
ref = strings.Split(ref, "?")[0]
ref = strings.TrimPrefix(ref, prefixForNPMPackages)
if _, ok := indexed.EcosystemPackages[dictionary.EcosystemNPM]; !ok {
indexed.EcosystemPackages[dictionary.EcosystemNPM] = make(dictionary.Packages)
}
indexed.EcosystemPackages[dictionary.EcosystemNPM][ref] = cpeItemName
}

View file

@ -0,0 +1,169 @@
package main
import (
"bytes"
"compress/gzip"
"io"
"os"
"testing"
"github.com/google/go-cmp/cmp"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/anchore/syft/syft/pkg/cataloger/common/cpe/dictionary"
)
func Test_generateIndexedDictionaryJSON(t *testing.T) {
f, err := os.Open("testdata/official-cpe-dictionary_v2.3.xml")
require.NoError(t, err)
// Create a buffer to store the gzipped data in memory
buf := new(bytes.Buffer)
w := gzip.NewWriter(buf)
_, err = io.Copy(w, f)
require.NoError(t, err)
// (finalize the gzip stream)
err = w.Close()
require.NoError(t, err)
dictionaryJSON, err := generateIndexedDictionaryJSON(buf)
assert.NoError(t, err)
expected, err := os.ReadFile("./testdata/expected-cpe-index.json")
require.NoError(t, err)
expectedDictionaryJSONString := string(expected)
dictionaryJSONString := string(dictionaryJSON)
if diff := cmp.Diff(expectedDictionaryJSONString, dictionaryJSONString); diff != "" {
t.Errorf("generateIndexedDictionaryJSON() mismatch (-want +got):\n%s", diff)
}
}
func Test_addEntryFuncs(t *testing.T) {
tests := []struct {
name string
addEntryFunc func(indexed *dictionary.Indexed, ref string, cpeItemName string)
inputRef string
inputCpeItemName string
expectedIndexed dictionary.Indexed
}{
{
name: "addEntryForRustCrate",
addEntryFunc: addEntryForRustCrate,
inputRef: "https://crates.io/crates/unicycle/versions",
inputCpeItemName: "cpe:2.3:a:unicycle_project:unicycle:*:*:*:*:*:rust:*:*",
expectedIndexed: dictionary.Indexed{
EcosystemPackages: map[string]dictionary.Packages{
dictionary.EcosystemRustCrates: {
"unicycle": "cpe:2.3:a:unicycle_project:unicycle:*:*:*:*:*:rust:*:*",
},
},
},
},
{
name: "addEntryForJenkinsPlugin",
addEntryFunc: addEntryForJenkinsPlugin,
inputRef: "https://github.com/jenkinsci/sonarqube-plugin",
inputCpeItemName: "cpe:2.3:a:sonarsource:sonarqube_scanner:2.7:*:*:*:*:jenkins:*:*",
expectedIndexed: dictionary.Indexed{
EcosystemPackages: map[string]dictionary.Packages{
dictionary.EcosystemJenkinsPlugins: {
"sonarqube": "cpe:2.3:a:sonarsource:sonarqube_scanner:2.7:*:*:*:*:jenkins:*:*",
},
},
},
},
{
name: "addEntryForJenkinsPlugin: not actually a plugin",
addEntryFunc: addEntryForJenkinsPlugin,
inputRef: "https://github.com/jenkinsci/jenkins",
inputCpeItemName: "cpe:2.3:a:jenkins:jenkinsci:2.7:*:*:*:*:*:*:*",
expectedIndexed: dictionary.Indexed{
EcosystemPackages: map[string]dictionary.Packages{},
},
},
{
name: "addEntryForPyPIPackage",
addEntryFunc: addEntryForPyPIPackage,
inputRef: "https://pypi.org/project/vault-cli/#history",
inputCpeItemName: "cpe:2.3:a:vault-cli_project:vault-cli:*:*:*:*:*:python:*:*",
expectedIndexed: dictionary.Indexed{
EcosystemPackages: map[string]dictionary.Packages{
dictionary.EcosystemPyPI: {
"vault-cli": "cpe:2.3:a:vault-cli_project:vault-cli:*:*:*:*:*:python:*:*",
},
},
},
},
{
name: "addEntryForNativeRubyGem",
addEntryFunc: addEntryForNativeRubyGem,
inputRef: "https://github.com/ruby/openssl/releases",
inputCpeItemName: "cpe:2.3:a:ruby-lang:openssl:-:*:*:*:*:ruby:*:*",
expectedIndexed: dictionary.Indexed{
EcosystemPackages: map[string]dictionary.Packages{
dictionary.EcosystemRubyGems: {
"openssl": "cpe:2.3:a:ruby-lang:openssl:-:*:*:*:*:ruby:*:*",
},
},
},
},
{
name: "addEntryForRubyGem: https",
addEntryFunc: addEntryForRubyGem,
inputRef: "https://rubygems.org/gems/actionview/versions",
inputCpeItemName: "cpe:2.3:a:action_view_project:action_view:*:*:*:*:*:ruby:*:*",
expectedIndexed: dictionary.Indexed{
EcosystemPackages: map[string]dictionary.Packages{
dictionary.EcosystemRubyGems: {
"actionview": "cpe:2.3:a:action_view_project:action_view:*:*:*:*:*:ruby:*:*",
},
},
},
},
{
name: "addEntryForRubyGem: http",
addEntryFunc: addEntryForRubyGem,
inputRef: "http://rubygems.org/gems/rbovirt",
inputCpeItemName: "cpe:2.3:a:amos_benari:rbovirt:*:*:*:*:*:ruby:*:*",
expectedIndexed: dictionary.Indexed{
EcosystemPackages: map[string]dictionary.Packages{
dictionary.EcosystemRubyGems: {
"rbovirt": "cpe:2.3:a:amos_benari:rbovirt:*:*:*:*:*:ruby:*:*",
},
},
},
},
{
name: "addEntryForNPMPackage",
addEntryFunc: addEntryForNPMPackage,
inputRef: "https://www.npmjs.com/package/@nubosoftware/node-static",
inputCpeItemName: "cpe:2.3:a:\\@nubosoftware\\/node-static_project:\\@nubosoftware\\/node-static:-:*:*:*:*:node.js:*:*",
expectedIndexed: dictionary.Indexed{
EcosystemPackages: map[string]dictionary.Packages{
dictionary.EcosystemNPM: {
"@nubosoftware/node-static": "cpe:2.3:a:\\@nubosoftware\\/node-static_project:\\@nubosoftware\\/node-static:-:*:*:*:*:node.js:*:*",
},
},
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
indexed := &dictionary.Indexed{
EcosystemPackages: make(map[string]dictionary.Packages),
}
tt.addEntryFunc(indexed, tt.inputRef, tt.inputCpeItemName)
if diff := cmp.Diff(tt.expectedIndexed, *indexed); diff != "" {
t.Errorf("addEntry* mismatch (-want +got):\n%s", diff)
}
})
}
}

View file

@ -0,0 +1,57 @@
// This program downloads the latest CPE dictionary from NIST and processes it into a JSON file that can be embedded into Syft for more accurate CPE results.
package main
import (
"errors"
"flag"
"fmt"
"log"
"net/http"
"os"
)
func mainE() error {
var outputFilename string
flag.StringVar(&outputFilename, "o", "", "file location to save CPE index")
flag.Parse()
if outputFilename == "" {
return errors.New("-o is required")
}
// Download and decompress file
fmt.Println("Fetching CPE dictionary...")
resp, err := http.Get(cpeDictionaryURL)
if err != nil {
return fmt.Errorf("unable to get CPE dictionary: %w", err)
}
defer resp.Body.Close()
fmt.Println("Generating index...")
dictionaryJSON, err := generateIndexedDictionaryJSON(resp.Body)
if err != nil {
return err
}
// Write CPE index (JSON data) to disk
err = os.WriteFile(outputFilename, dictionaryJSON, 0600)
if err != nil {
return fmt.Errorf("unable to write processed CPE dictionary to file: %w", err)
}
fmt.Println("Done!")
return nil
}
// errExit prints an error and exits with a non-zero exit code.
func errExit(err error) {
log.Printf("command failed: %s", err)
os.Exit(1)
}
func main() {
if err := mainE(); err != nil {
errExit(err)
}
}

View file

@ -0,0 +1,21 @@
package main
type CpeItem struct {
Name string `xml:"name,attr"`
Title string `xml:"title"`
References []struct {
Reference struct {
Href string `xml:"href,attr"`
Body string `xml:",chardata"`
} `xml:"reference"`
} `xml:"references"`
Cpe23Item struct {
Name string `xml:"name,attr"`
} `xml:"cpe23-item"`
}
type CpeList struct {
CpeItems []CpeItem `xml:"cpe-item"`
}
const cpeDictionaryURL = "https://nvd.nist.gov/feeds/xml/cpe/dictionary/official-cpe-dictionary_v2.3.xml.gz"

View file

@ -0,0 +1,23 @@
{
"ecosystems": {
"jenkins_plugins": {
"sonarqube": "cpe:2.3:a:sonarsource:sonarqube_scanner:*:*:*:*:*:jenkins:*:*"
},
"npm": {
"merge-recursive": "cpe:2.3:a:umbraengineering:merge-recursive:*:*:*:*:*:node.js:*:*",
"static-dev-server": "cpe:2.3:a:static-dev-server_project:static-dev-server:*:*:*:*:*:node.js:*:*",
"umount": "cpe:2.3:a:umount_project:umount:*:*:*:*:*:node.js:*:*",
"undefsafe": "cpe:2.3:a:undefsafe_project:undefsafe:*:*:*:*:*:node.js:*:*",
"underscore": "cpe:2.3:a:underscorejs:underscore:*:*:*:*:*:node.js:*:*",
"underscore-99xp": "cpe:2.3:a:underscore-99xp_project:underscore-99xp:*:*:*:*:*:node.js:*:*",
"unicode": "cpe:2.3:a:unicode_project:unicode:*:*:*:*:*:node.js:*:*",
"unicorn-list": "cpe:2.3:a:unicorn-list_project:unicorn-list:*:*:*:*:*:node.js:*:*"
},
"rubygems": {
"openssl": "cpe:2.3:a:ruby-lang:openssl:*:*:*:*:*:*:*:*"
},
"rust_crates": {
"unicycle": "cpe:2.3:a:unicycle_project:unicycle:*:*:*:*:*:rust:*:*"
}
}
}

View file

@ -0,0 +1,15 @@
package dictionary
const (
EcosystemNPM = "npm"
EcosystemRubyGems = "rubygems"
EcosystemPyPI = "pypi"
EcosystemJenkinsPlugins = "jenkins_plugins"
EcosystemRustCrates = "rust_crates"
)
type Indexed struct {
EcosystemPackages map[string]Packages `json:"ecosystems"`
}
type Packages map[string]string

View file

@ -3,16 +3,21 @@ package cpe
import ( import (
"bufio" "bufio"
"bytes" "bytes"
_ "embed"
"encoding/json"
"fmt" "fmt"
"sort" "sort"
"strings" "strings"
"sync"
"github.com/facebookincubator/nvdtools/wfn" "github.com/facebookincubator/nvdtools/wfn"
"github.com/scylladb/go-set/strset" "github.com/scylladb/go-set/strset"
"github.com/anchore/syft/internal" "github.com/anchore/syft/internal"
"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/syft/cpe" "github.com/anchore/syft/syft/cpe"
"github.com/anchore/syft/syft/pkg" "github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger/common/cpe/dictionary"
) )
// knownVendors contains vendor strings that are known to exist in // knownVendors contains vendor strings that are known to exist in
@ -32,6 +37,77 @@ func newCPE(product, vendor, version, targetSW string) *wfn.Attributes {
return &c return &c
} }
//go:embed dictionary/data/cpe-index.json
var indexedCPEDictionaryData []byte
var indexedCPEDictionary *dictionary.Indexed
var indexedCPEDictionaryOnce sync.Once
func GetIndexedDictionary() (_ *dictionary.Indexed, err error) {
indexedCPEDictionaryOnce.Do(func() {
err = json.Unmarshal(indexedCPEDictionaryData, &indexedCPEDictionary)
})
if err != nil {
return
}
if indexedCPEDictionary == nil {
err = fmt.Errorf("failed to unmarshal indexed CPE dictionary")
return
}
return indexedCPEDictionary, err
}
func DictionaryFind(p pkg.Package) (cpe.CPE, bool) {
dict, err := GetIndexedDictionary()
if err != nil {
log.Debugf("dictionary CPE lookup not available: %+v", err)
return cpe.CPE{}, false
}
var (
cpeString string
ok bool
)
switch p.Type {
case pkg.NpmPkg:
cpeString, ok = dict.EcosystemPackages[dictionary.EcosystemNPM][p.Name]
case pkg.GemPkg:
cpeString, ok = dict.EcosystemPackages[dictionary.EcosystemRubyGems][p.Name]
case pkg.PythonPkg:
cpeString, ok = dict.EcosystemPackages[dictionary.EcosystemPyPI][p.Name]
case pkg.JenkinsPluginPkg:
cpeString, ok = dict.EcosystemPackages[dictionary.EcosystemJenkinsPlugins][p.Name]
case pkg.RustPkg:
cpeString, ok = dict.EcosystemPackages[dictionary.EcosystemRustCrates][p.Name]
default:
// The dictionary doesn't support this package type yet.
return cpe.CPE{}, false
}
if !ok {
// The dictionary doesn't have a CPE for this package.
return cpe.CPE{}, false
}
parsedCPE, err := cpe.New(cpeString)
if err != nil {
return cpe.CPE{}, false
}
parsedCPE.Version = p.Version
return parsedCPE, true
}
// Generate Create a list of CPEs for a given package, trying to guess the vendor, product tuple. We should be trying to // Generate Create a list of CPEs for a given package, trying to guess the vendor, product tuple. We should be trying to
// generate the minimal set of representative CPEs, which implies that optional fields should not be included // generate the minimal set of representative CPEs, which implies that optional fields should not be included
// (such as target SW). // (such as target SW).

View file

@ -969,3 +969,33 @@ func Test_addSeparatorVariations(t *testing.T) {
}) })
} }
} }
func TestDictionaryFindIsWired(t *testing.T) {
tests := []struct {
name string
pkg pkg.Package
want string
wantExists bool
}{
{
name: "sanity check that cpe data is wired up",
pkg: pkg.Package{
Name: "openssl",
Version: "1.0.2k",
Type: pkg.GemPkg,
},
want: "cpe:2.3:a:ruby-lang:openssl:1.0.2k:*:*:*:*:*:*:*",
// without the cpe data wired up, this would be empty (generation also creates cpe:2.3:a:openssl:openssl:1.0.2k:*:*:*:*:*:*:*)
wantExists: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got, gotExists := DictionaryFind(tt.pkg)
assert.Equal(t, tt.want, got.BindToFmtString())
assert.Equal(t, tt.wantExists, gotExists)
})
}
}