mirror of
https://github.com/anchore/syft
synced 2024-11-10 06:14:16 +00:00
Introduce indexed embedded CPE dictionary (#1897)
* Introduce indexed embedded CPE dictionary Signed-off-by: Dan Luhring <dluhring@chainguard.dev> * Don't generate cpe-index on make snapshot Signed-off-by: Dan Luhring <dluhring@chainguard.dev> * Add unit tests for individual addEntry funcs Signed-off-by: Dan Luhring <dluhring@chainguard.dev> * migrate CPE index build to go generate and add periodic workflow Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com> * add test to ensure generated cpe index is wired up to function that uses it Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com> --------- Signed-off-by: Dan Luhring <dluhring@chainguard.dev> Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com> Co-authored-by: Alex Goodman <wagoodman@users.noreply.github.com>
This commit is contained in:
parent
3f5c601620
commit
99d172f0d1
16 changed files with 26855 additions and 4 deletions
2
.github/workflows/update-bootstrap-tools.yml
vendored
2
.github/workflows/update-bootstrap-tools.yml
vendored
|
@ -6,7 +6,7 @@ on:
|
|||
workflow_dispatch:
|
||||
|
||||
env:
|
||||
GO_VERSION: "1.19.x"
|
||||
GO_VERSION: "1.20.x"
|
||||
GO_STABLE_VERSION: true
|
||||
|
||||
jobs:
|
||||
|
|
43
.github/workflows/update-cpe-dictionary-index.yml
vendored
Normal file
43
.github/workflows/update-cpe-dictionary-index.yml
vendored
Normal file
|
@ -0,0 +1,43 @@
|
|||
name: PR to update CPE dictionary index
|
||||
on:
|
||||
schedule:
|
||||
- cron: "0 1 * * 1" # every monday at 1 AM
|
||||
|
||||
workflow_dispatch:
|
||||
|
||||
env:
|
||||
GO_VERSION: "1.20.x"
|
||||
GO_STABLE_VERSION: true
|
||||
|
||||
jobs:
|
||||
upgrade-cpe-dictionary-index:
|
||||
runs-on: ubuntu-latest
|
||||
if: github.repository == 'anchore/syft' # only run for main repo
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
|
||||
- uses: actions/setup-go@v4
|
||||
with:
|
||||
go-version: ${{ env.GO_VERSION }}
|
||||
stable: ${{ env.GO_STABLE_VERSION }}
|
||||
|
||||
- run: |
|
||||
make generate-cpe-dictionary-index
|
||||
|
||||
- uses: tibdex/github-app-token@v1
|
||||
id: generate-token
|
||||
with:
|
||||
app_id: ${{ secrets.TOKEN_APP_ID }}
|
||||
private_key: ${{ secrets.TOKEN_APP_PRIVATE_KEY }}
|
||||
|
||||
- uses: peter-evans/create-pull-request@v5
|
||||
with:
|
||||
signoff: true
|
||||
delete-branch: true
|
||||
branch: auto/latest-cpe-dictionary-index
|
||||
labels: dependencies
|
||||
commit-message: "chore(deps): update CPE dictionary index"
|
||||
title: "chore(deps): update CPE dictionary index"
|
||||
body: |
|
||||
Update CPE dictionary index based on the latest available CPE dictionary
|
||||
token: ${{ steps.generate-token.outputs.token }}
|
|
@ -6,7 +6,7 @@ on:
|
|||
workflow_dispatch:
|
||||
|
||||
env:
|
||||
GO_VERSION: "1.19.x"
|
||||
GO_VERSION: "1.20.x"
|
||||
GO_STABLE_VERSION: true
|
||||
|
||||
jobs:
|
||||
|
|
7
Makefile
7
Makefile
|
@ -298,7 +298,7 @@ compare-test-rpm-package-install: $(TEMP_DIR) $(SNAPSHOT_DIR)
|
|||
$(TEMP_DIR)
|
||||
|
||||
|
||||
## Code generation targets #################################
|
||||
## Code and data generation targets #################################
|
||||
|
||||
.PHONY: generate-json-schema
|
||||
generate-json-schema: ## Generate a new json schema
|
||||
|
@ -309,6 +309,11 @@ generate-license-list: ## Generate an updated spdx license list
|
|||
go generate ./internal/spdxlicense/...
|
||||
gofmt -s -w ./internal/spdxlicense
|
||||
|
||||
.PHONY: generate-cpe-dictionary-index
|
||||
generate-cpe-dictionary-index: ## Build the CPE index based off of the latest available CPE dictionary
|
||||
$(call title,Building CPE index)
|
||||
go generate ./syft/pkg/cataloger/common/cpe/dictionary
|
||||
|
||||
|
||||
## Build-related targets #################################
|
||||
|
||||
|
|
|
@ -76,7 +76,14 @@ func runCataloger(cataloger pkg.Cataloger, resolver file.Resolver) (catalogerRes
|
|||
for _, p := range packages {
|
||||
// generate CPEs (note: this is excluded from package ID, so is safe to mutate)
|
||||
// we might have binary classified CPE already with the package so we want to append here
|
||||
|
||||
dictionaryCPE, ok := cpe.DictionaryFind(p)
|
||||
if ok {
|
||||
log.Debugf("used CPE dictionary to find CPE for %s package %q: %s", p.Type, p.Name, dictionaryCPE.BindToFmtString())
|
||||
p.CPEs = append(p.CPEs, dictionaryCPE)
|
||||
} else {
|
||||
p.CPEs = append(p.CPEs, cpe.Generate(p)...)
|
||||
}
|
||||
|
||||
// if we were not able to identify the language we have an opportunity
|
||||
// to try and get this value from the PURL. Worst case we assert that
|
||||
|
|
1296
syft/pkg/cataloger/common/cpe/dictionary/data/cpe-index.json
Normal file
1296
syft/pkg/cataloger/common/cpe/dictionary/data/cpe-index.json
Normal file
File diff suppressed because it is too large
Load diff
|
@ -0,0 +1,3 @@
|
|||
package dictionary
|
||||
|
||||
//go:generate go run ./index-generator/ -o data/cpe-index.json
|
|
@ -0,0 +1,230 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"compress/gzip"
|
||||
"encoding/json"
|
||||
"encoding/xml"
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"strings"
|
||||
|
||||
"github.com/facebookincubator/nvdtools/wfn"
|
||||
"golang.org/x/exp/slices"
|
||||
|
||||
"github.com/anchore/syft/syft/pkg/cataloger/common/cpe/dictionary"
|
||||
)
|
||||
|
||||
func generateIndexedDictionaryJSON(rawGzipData io.Reader) ([]byte, error) {
|
||||
gzipReader, err := gzip.NewReader(rawGzipData)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unable to decompress CPE dictionary: %w", err)
|
||||
}
|
||||
defer gzipReader.Close()
|
||||
|
||||
// Read XML data
|
||||
data, err := io.ReadAll(gzipReader)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unable to read CPE dictionary: %w", err)
|
||||
}
|
||||
|
||||
// Unmarshal XML
|
||||
var cpeList CpeList
|
||||
if err := xml.Unmarshal(data, &cpeList); err != nil {
|
||||
return nil, fmt.Errorf("unable to unmarshal CPE dictionary XML: %w", err)
|
||||
}
|
||||
|
||||
// Filter out data that's not applicable here
|
||||
cpeList = filterCpeList(cpeList)
|
||||
|
||||
// Create indexed dictionary to help with looking up CPEs
|
||||
indexedDictionary := indexCPEList(cpeList)
|
||||
|
||||
// Convert to JSON
|
||||
jsonData, err := json.MarshalIndent(indexedDictionary, "", " ")
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unable to marshal CPE dictionary to JSON: %w", err)
|
||||
}
|
||||
return jsonData, nil
|
||||
}
|
||||
|
||||
// filterCpeList removes CPE items that are not applicable to software packages.
|
||||
func filterCpeList(cpeList CpeList) CpeList {
|
||||
var processedCpeList CpeList
|
||||
|
||||
seen := make(map[string]struct{})
|
||||
|
||||
for _, cpeItem := range cpeList.CpeItems {
|
||||
// Skip CPE items that don't have any references.
|
||||
if len(cpeItem.References) == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
// Skip CPE items where the CPE URI doesn't meet our criteria.
|
||||
parsedName, err := wfn.Parse(cpeItem.Name)
|
||||
if err != nil {
|
||||
log.Printf("unable to parse CPE URI %q: %s", cpeItem.Name, err)
|
||||
}
|
||||
|
||||
if slices.Contains([]string{"h", "o"}, parsedName.Part) {
|
||||
continue
|
||||
}
|
||||
|
||||
normalizedName := normalizeCPE(parsedName).BindToURI()
|
||||
if _, ok := seen[normalizedName]; ok {
|
||||
continue
|
||||
}
|
||||
seen[normalizedName] = struct{}{}
|
||||
cpeItem.Name = normalizedName
|
||||
|
||||
parsedCPE, err := wfn.Parse(cpeItem.Cpe23Item.Name)
|
||||
if err != nil {
|
||||
log.Printf("unable to parse CPE value %q: %s", cpeItem.Cpe23Item.Name, err)
|
||||
}
|
||||
|
||||
cpeItem.Cpe23Item.Name = normalizeCPE(parsedCPE).BindToFmtString()
|
||||
|
||||
processedCpeList.CpeItems = append(processedCpeList.CpeItems, cpeItem)
|
||||
}
|
||||
|
||||
return processedCpeList
|
||||
}
|
||||
|
||||
// normalizeCPE removes the version and update parts of a CPE.
|
||||
func normalizeCPE(cpe *wfn.Attributes) *wfn.Attributes {
|
||||
cpeCopy := *cpe
|
||||
|
||||
cpeCopy.Version = ""
|
||||
cpeCopy.Update = ""
|
||||
|
||||
return &cpeCopy
|
||||
}
|
||||
|
||||
const (
|
||||
prefixForNPMPackages = "https://www.npmjs.com/package/"
|
||||
prefixForRubyGems = "https://rubygems.org/gems/"
|
||||
prefixForRubyGemsHTTP = "http://rubygems.org/gems/"
|
||||
prefixForNativeRubyGems = "https://github.com/ruby/"
|
||||
prefixForPyPIPackages = "https://pypi.org/project/"
|
||||
prefixForJenkinsPlugins = "https://github.com/jenkinsci/"
|
||||
prefixForRustCrates = "https://crates.io/crates/"
|
||||
)
|
||||
|
||||
// indexCPEList creates an index of CPEs by ecosystem.
|
||||
func indexCPEList(list CpeList) *dictionary.Indexed {
|
||||
indexed := &dictionary.Indexed{
|
||||
EcosystemPackages: make(map[string]dictionary.Packages),
|
||||
}
|
||||
|
||||
for _, cpeItem := range list.CpeItems {
|
||||
cpeItemName := cpeItem.Cpe23Item.Name
|
||||
|
||||
for _, reference := range cpeItem.References {
|
||||
ref := reference.Reference.Href
|
||||
|
||||
switch {
|
||||
case strings.HasPrefix(ref, prefixForNPMPackages):
|
||||
addEntryForNPMPackage(indexed, ref, cpeItemName)
|
||||
|
||||
case strings.HasPrefix(ref, prefixForRubyGems), strings.HasPrefix(ref, prefixForRubyGemsHTTP):
|
||||
addEntryForRubyGem(indexed, ref, cpeItemName)
|
||||
|
||||
case strings.HasPrefix(ref, prefixForNativeRubyGems):
|
||||
addEntryForNativeRubyGem(indexed, ref, cpeItemName)
|
||||
|
||||
case strings.HasPrefix(ref, prefixForPyPIPackages):
|
||||
addEntryForPyPIPackage(indexed, ref, cpeItemName)
|
||||
|
||||
case strings.HasPrefix(ref, prefixForJenkinsPlugins):
|
||||
// It _might_ be a jenkins plugin!
|
||||
addEntryForJenkinsPlugin(indexed, ref, cpeItemName)
|
||||
|
||||
case strings.HasPrefix(ref, prefixForRustCrates):
|
||||
addEntryForRustCrate(indexed, ref, cpeItemName)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return indexed
|
||||
}
|
||||
|
||||
func addEntryForRustCrate(indexed *dictionary.Indexed, ref string, cpeItemName string) {
|
||||
// Prune off the non-package-name parts of the URL
|
||||
ref = strings.TrimPrefix(ref, prefixForRustCrates)
|
||||
ref = strings.Split(ref, "/")[0]
|
||||
|
||||
if _, ok := indexed.EcosystemPackages[dictionary.EcosystemRustCrates]; !ok {
|
||||
indexed.EcosystemPackages[dictionary.EcosystemRustCrates] = make(dictionary.Packages)
|
||||
}
|
||||
|
||||
indexed.EcosystemPackages[dictionary.EcosystemRustCrates][ref] = cpeItemName
|
||||
}
|
||||
|
||||
func addEntryForJenkinsPlugin(indexed *dictionary.Indexed, ref string, cpeItemName string) {
|
||||
// Prune off the non-package-name parts of the URL
|
||||
ref = strings.TrimPrefix(ref, prefixForJenkinsPlugins)
|
||||
ref = strings.Split(ref, "/")[0]
|
||||
|
||||
if !strings.HasSuffix(ref, "-plugin") {
|
||||
// It's not a jenkins plugin!
|
||||
return
|
||||
}
|
||||
|
||||
ref = strings.TrimSuffix(ref, "-plugin")
|
||||
|
||||
if _, ok := indexed.EcosystemPackages[dictionary.EcosystemJenkinsPlugins]; !ok {
|
||||
indexed.EcosystemPackages[dictionary.EcosystemJenkinsPlugins] = make(dictionary.Packages)
|
||||
}
|
||||
|
||||
indexed.EcosystemPackages[dictionary.EcosystemJenkinsPlugins][ref] = cpeItemName
|
||||
}
|
||||
|
||||
func addEntryForPyPIPackage(indexed *dictionary.Indexed, ref string, cpeItemName string) {
|
||||
// Prune off the non-package-name parts of the URL
|
||||
ref = strings.TrimPrefix(ref, prefixForPyPIPackages)
|
||||
ref = strings.Split(ref, "/")[0]
|
||||
|
||||
if _, ok := indexed.EcosystemPackages[dictionary.EcosystemPyPI]; !ok {
|
||||
indexed.EcosystemPackages[dictionary.EcosystemPyPI] = make(dictionary.Packages)
|
||||
}
|
||||
|
||||
indexed.EcosystemPackages[dictionary.EcosystemPyPI][ref] = cpeItemName
|
||||
}
|
||||
|
||||
func addEntryForNativeRubyGem(indexed *dictionary.Indexed, ref string, cpeItemName string) {
|
||||
// Prune off the non-package-name parts of the URL
|
||||
ref = strings.TrimPrefix(ref, prefixForNativeRubyGems)
|
||||
ref = strings.Split(ref, "/")[0]
|
||||
|
||||
if _, ok := indexed.EcosystemPackages[dictionary.EcosystemRubyGems]; !ok {
|
||||
indexed.EcosystemPackages[dictionary.EcosystemRubyGems] = make(dictionary.Packages)
|
||||
}
|
||||
|
||||
indexed.EcosystemPackages[dictionary.EcosystemRubyGems][ref] = cpeItemName
|
||||
}
|
||||
|
||||
func addEntryForRubyGem(indexed *dictionary.Indexed, ref string, cpeItemName string) {
|
||||
// Prune off the non-package-name parts of the URL
|
||||
ref = strings.TrimPrefix(ref, prefixForRubyGems)
|
||||
ref = strings.TrimPrefix(ref, prefixForRubyGemsHTTP)
|
||||
ref = strings.Split(ref, "/")[0]
|
||||
|
||||
if _, ok := indexed.EcosystemPackages[dictionary.EcosystemRubyGems]; !ok {
|
||||
indexed.EcosystemPackages[dictionary.EcosystemRubyGems] = make(dictionary.Packages)
|
||||
}
|
||||
|
||||
indexed.EcosystemPackages[dictionary.EcosystemRubyGems][ref] = cpeItemName
|
||||
}
|
||||
|
||||
func addEntryForNPMPackage(indexed *dictionary.Indexed, ref string, cpeItemName string) {
|
||||
// Prune off the non-package-name parts of the URL
|
||||
ref = strings.Split(ref, "/v/")[0]
|
||||
ref = strings.Split(ref, "?")[0]
|
||||
ref = strings.TrimPrefix(ref, prefixForNPMPackages)
|
||||
|
||||
if _, ok := indexed.EcosystemPackages[dictionary.EcosystemNPM]; !ok {
|
||||
indexed.EcosystemPackages[dictionary.EcosystemNPM] = make(dictionary.Packages)
|
||||
}
|
||||
|
||||
indexed.EcosystemPackages[dictionary.EcosystemNPM][ref] = cpeItemName
|
||||
}
|
|
@ -0,0 +1,169 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"compress/gzip"
|
||||
"io"
|
||||
"os"
|
||||
"testing"
|
||||
|
||||
"github.com/google/go-cmp/cmp"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/anchore/syft/syft/pkg/cataloger/common/cpe/dictionary"
|
||||
)
|
||||
|
||||
func Test_generateIndexedDictionaryJSON(t *testing.T) {
|
||||
f, err := os.Open("testdata/official-cpe-dictionary_v2.3.xml")
|
||||
require.NoError(t, err)
|
||||
|
||||
// Create a buffer to store the gzipped data in memory
|
||||
buf := new(bytes.Buffer)
|
||||
|
||||
w := gzip.NewWriter(buf)
|
||||
_, err = io.Copy(w, f)
|
||||
require.NoError(t, err)
|
||||
|
||||
// (finalize the gzip stream)
|
||||
err = w.Close()
|
||||
require.NoError(t, err)
|
||||
|
||||
dictionaryJSON, err := generateIndexedDictionaryJSON(buf)
|
||||
assert.NoError(t, err)
|
||||
|
||||
expected, err := os.ReadFile("./testdata/expected-cpe-index.json")
|
||||
require.NoError(t, err)
|
||||
|
||||
expectedDictionaryJSONString := string(expected)
|
||||
dictionaryJSONString := string(dictionaryJSON)
|
||||
|
||||
if diff := cmp.Diff(expectedDictionaryJSONString, dictionaryJSONString); diff != "" {
|
||||
t.Errorf("generateIndexedDictionaryJSON() mismatch (-want +got):\n%s", diff)
|
||||
}
|
||||
}
|
||||
|
||||
func Test_addEntryFuncs(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
addEntryFunc func(indexed *dictionary.Indexed, ref string, cpeItemName string)
|
||||
inputRef string
|
||||
inputCpeItemName string
|
||||
expectedIndexed dictionary.Indexed
|
||||
}{
|
||||
{
|
||||
name: "addEntryForRustCrate",
|
||||
addEntryFunc: addEntryForRustCrate,
|
||||
inputRef: "https://crates.io/crates/unicycle/versions",
|
||||
inputCpeItemName: "cpe:2.3:a:unicycle_project:unicycle:*:*:*:*:*:rust:*:*",
|
||||
expectedIndexed: dictionary.Indexed{
|
||||
EcosystemPackages: map[string]dictionary.Packages{
|
||||
dictionary.EcosystemRustCrates: {
|
||||
"unicycle": "cpe:2.3:a:unicycle_project:unicycle:*:*:*:*:*:rust:*:*",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "addEntryForJenkinsPlugin",
|
||||
addEntryFunc: addEntryForJenkinsPlugin,
|
||||
inputRef: "https://github.com/jenkinsci/sonarqube-plugin",
|
||||
inputCpeItemName: "cpe:2.3:a:sonarsource:sonarqube_scanner:2.7:*:*:*:*:jenkins:*:*",
|
||||
expectedIndexed: dictionary.Indexed{
|
||||
EcosystemPackages: map[string]dictionary.Packages{
|
||||
dictionary.EcosystemJenkinsPlugins: {
|
||||
"sonarqube": "cpe:2.3:a:sonarsource:sonarqube_scanner:2.7:*:*:*:*:jenkins:*:*",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "addEntryForJenkinsPlugin: not actually a plugin",
|
||||
addEntryFunc: addEntryForJenkinsPlugin,
|
||||
inputRef: "https://github.com/jenkinsci/jenkins",
|
||||
inputCpeItemName: "cpe:2.3:a:jenkins:jenkinsci:2.7:*:*:*:*:*:*:*",
|
||||
expectedIndexed: dictionary.Indexed{
|
||||
EcosystemPackages: map[string]dictionary.Packages{},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "addEntryForPyPIPackage",
|
||||
addEntryFunc: addEntryForPyPIPackage,
|
||||
inputRef: "https://pypi.org/project/vault-cli/#history",
|
||||
inputCpeItemName: "cpe:2.3:a:vault-cli_project:vault-cli:*:*:*:*:*:python:*:*",
|
||||
expectedIndexed: dictionary.Indexed{
|
||||
EcosystemPackages: map[string]dictionary.Packages{
|
||||
dictionary.EcosystemPyPI: {
|
||||
"vault-cli": "cpe:2.3:a:vault-cli_project:vault-cli:*:*:*:*:*:python:*:*",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "addEntryForNativeRubyGem",
|
||||
addEntryFunc: addEntryForNativeRubyGem,
|
||||
inputRef: "https://github.com/ruby/openssl/releases",
|
||||
inputCpeItemName: "cpe:2.3:a:ruby-lang:openssl:-:*:*:*:*:ruby:*:*",
|
||||
expectedIndexed: dictionary.Indexed{
|
||||
EcosystemPackages: map[string]dictionary.Packages{
|
||||
dictionary.EcosystemRubyGems: {
|
||||
"openssl": "cpe:2.3:a:ruby-lang:openssl:-:*:*:*:*:ruby:*:*",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "addEntryForRubyGem: https",
|
||||
addEntryFunc: addEntryForRubyGem,
|
||||
inputRef: "https://rubygems.org/gems/actionview/versions",
|
||||
inputCpeItemName: "cpe:2.3:a:action_view_project:action_view:*:*:*:*:*:ruby:*:*",
|
||||
expectedIndexed: dictionary.Indexed{
|
||||
EcosystemPackages: map[string]dictionary.Packages{
|
||||
dictionary.EcosystemRubyGems: {
|
||||
"actionview": "cpe:2.3:a:action_view_project:action_view:*:*:*:*:*:ruby:*:*",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "addEntryForRubyGem: http",
|
||||
addEntryFunc: addEntryForRubyGem,
|
||||
inputRef: "http://rubygems.org/gems/rbovirt",
|
||||
inputCpeItemName: "cpe:2.3:a:amos_benari:rbovirt:*:*:*:*:*:ruby:*:*",
|
||||
expectedIndexed: dictionary.Indexed{
|
||||
EcosystemPackages: map[string]dictionary.Packages{
|
||||
dictionary.EcosystemRubyGems: {
|
||||
"rbovirt": "cpe:2.3:a:amos_benari:rbovirt:*:*:*:*:*:ruby:*:*",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "addEntryForNPMPackage",
|
||||
addEntryFunc: addEntryForNPMPackage,
|
||||
inputRef: "https://www.npmjs.com/package/@nubosoftware/node-static",
|
||||
inputCpeItemName: "cpe:2.3:a:\\@nubosoftware\\/node-static_project:\\@nubosoftware\\/node-static:-:*:*:*:*:node.js:*:*",
|
||||
expectedIndexed: dictionary.Indexed{
|
||||
EcosystemPackages: map[string]dictionary.Packages{
|
||||
dictionary.EcosystemNPM: {
|
||||
"@nubosoftware/node-static": "cpe:2.3:a:\\@nubosoftware\\/node-static_project:\\@nubosoftware\\/node-static:-:*:*:*:*:node.js:*:*",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
indexed := &dictionary.Indexed{
|
||||
EcosystemPackages: make(map[string]dictionary.Packages),
|
||||
}
|
||||
|
||||
tt.addEntryFunc(indexed, tt.inputRef, tt.inputCpeItemName)
|
||||
|
||||
if diff := cmp.Diff(tt.expectedIndexed, *indexed); diff != "" {
|
||||
t.Errorf("addEntry* mismatch (-want +got):\n%s", diff)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
|
@ -0,0 +1,57 @@
|
|||
// This program downloads the latest CPE dictionary from NIST and processes it into a JSON file that can be embedded into Syft for more accurate CPE results.
|
||||
package main
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"flag"
|
||||
"fmt"
|
||||
"log"
|
||||
"net/http"
|
||||
"os"
|
||||
)
|
||||
|
||||
func mainE() error {
|
||||
var outputFilename string
|
||||
flag.StringVar(&outputFilename, "o", "", "file location to save CPE index")
|
||||
flag.Parse()
|
||||
|
||||
if outputFilename == "" {
|
||||
return errors.New("-o is required")
|
||||
}
|
||||
|
||||
// Download and decompress file
|
||||
fmt.Println("Fetching CPE dictionary...")
|
||||
resp, err := http.Get(cpeDictionaryURL)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to get CPE dictionary: %w", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
fmt.Println("Generating index...")
|
||||
dictionaryJSON, err := generateIndexedDictionaryJSON(resp.Body)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Write CPE index (JSON data) to disk
|
||||
err = os.WriteFile(outputFilename, dictionaryJSON, 0600)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to write processed CPE dictionary to file: %w", err)
|
||||
}
|
||||
|
||||
fmt.Println("Done!")
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// errExit prints an error and exits with a non-zero exit code.
|
||||
func errExit(err error) {
|
||||
log.Printf("command failed: %s", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
func main() {
|
||||
if err := mainE(); err != nil {
|
||||
errExit(err)
|
||||
}
|
||||
}
|
|
@ -0,0 +1,21 @@
|
|||
package main
|
||||
|
||||
type CpeItem struct {
|
||||
Name string `xml:"name,attr"`
|
||||
Title string `xml:"title"`
|
||||
References []struct {
|
||||
Reference struct {
|
||||
Href string `xml:"href,attr"`
|
||||
Body string `xml:",chardata"`
|
||||
} `xml:"reference"`
|
||||
} `xml:"references"`
|
||||
Cpe23Item struct {
|
||||
Name string `xml:"name,attr"`
|
||||
} `xml:"cpe23-item"`
|
||||
}
|
||||
|
||||
type CpeList struct {
|
||||
CpeItems []CpeItem `xml:"cpe-item"`
|
||||
}
|
||||
|
||||
const cpeDictionaryURL = "https://nvd.nist.gov/feeds/xml/cpe/dictionary/official-cpe-dictionary_v2.3.xml.gz"
|
23
syft/pkg/cataloger/common/cpe/dictionary/index-generator/testdata/expected-cpe-index.json
vendored
Normal file
23
syft/pkg/cataloger/common/cpe/dictionary/index-generator/testdata/expected-cpe-index.json
vendored
Normal file
|
@ -0,0 +1,23 @@
|
|||
{
|
||||
"ecosystems": {
|
||||
"jenkins_plugins": {
|
||||
"sonarqube": "cpe:2.3:a:sonarsource:sonarqube_scanner:*:*:*:*:*:jenkins:*:*"
|
||||
},
|
||||
"npm": {
|
||||
"merge-recursive": "cpe:2.3:a:umbraengineering:merge-recursive:*:*:*:*:*:node.js:*:*",
|
||||
"static-dev-server": "cpe:2.3:a:static-dev-server_project:static-dev-server:*:*:*:*:*:node.js:*:*",
|
||||
"umount": "cpe:2.3:a:umount_project:umount:*:*:*:*:*:node.js:*:*",
|
||||
"undefsafe": "cpe:2.3:a:undefsafe_project:undefsafe:*:*:*:*:*:node.js:*:*",
|
||||
"underscore": "cpe:2.3:a:underscorejs:underscore:*:*:*:*:*:node.js:*:*",
|
||||
"underscore-99xp": "cpe:2.3:a:underscore-99xp_project:underscore-99xp:*:*:*:*:*:node.js:*:*",
|
||||
"unicode": "cpe:2.3:a:unicode_project:unicode:*:*:*:*:*:node.js:*:*",
|
||||
"unicorn-list": "cpe:2.3:a:unicorn-list_project:unicorn-list:*:*:*:*:*:node.js:*:*"
|
||||
},
|
||||
"rubygems": {
|
||||
"openssl": "cpe:2.3:a:ruby-lang:openssl:*:*:*:*:*:*:*:*"
|
||||
},
|
||||
"rust_crates": {
|
||||
"unicycle": "cpe:2.3:a:unicycle_project:unicycle:*:*:*:*:*:rust:*:*"
|
||||
}
|
||||
}
|
||||
}
|
24876
syft/pkg/cataloger/common/cpe/dictionary/index-generator/testdata/official-cpe-dictionary_v2.3.xml
vendored
Normal file
24876
syft/pkg/cataloger/common/cpe/dictionary/index-generator/testdata/official-cpe-dictionary_v2.3.xml
vendored
Normal file
File diff suppressed because it is too large
Load diff
15
syft/pkg/cataloger/common/cpe/dictionary/types.go
Normal file
15
syft/pkg/cataloger/common/cpe/dictionary/types.go
Normal file
|
@ -0,0 +1,15 @@
|
|||
package dictionary
|
||||
|
||||
const (
|
||||
EcosystemNPM = "npm"
|
||||
EcosystemRubyGems = "rubygems"
|
||||
EcosystemPyPI = "pypi"
|
||||
EcosystemJenkinsPlugins = "jenkins_plugins"
|
||||
EcosystemRustCrates = "rust_crates"
|
||||
)
|
||||
|
||||
type Indexed struct {
|
||||
EcosystemPackages map[string]Packages `json:"ecosystems"`
|
||||
}
|
||||
|
||||
type Packages map[string]string
|
|
@ -3,16 +3,21 @@ package cpe
|
|||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
_ "embed"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"sort"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
"github.com/facebookincubator/nvdtools/wfn"
|
||||
"github.com/scylladb/go-set/strset"
|
||||
|
||||
"github.com/anchore/syft/internal"
|
||||
"github.com/anchore/syft/internal/log"
|
||||
"github.com/anchore/syft/syft/cpe"
|
||||
"github.com/anchore/syft/syft/pkg"
|
||||
"github.com/anchore/syft/syft/pkg/cataloger/common/cpe/dictionary"
|
||||
)
|
||||
|
||||
// knownVendors contains vendor strings that are known to exist in
|
||||
|
@ -32,6 +37,77 @@ func newCPE(product, vendor, version, targetSW string) *wfn.Attributes {
|
|||
return &c
|
||||
}
|
||||
|
||||
//go:embed dictionary/data/cpe-index.json
|
||||
var indexedCPEDictionaryData []byte
|
||||
|
||||
var indexedCPEDictionary *dictionary.Indexed
|
||||
var indexedCPEDictionaryOnce sync.Once
|
||||
|
||||
func GetIndexedDictionary() (_ *dictionary.Indexed, err error) {
|
||||
indexedCPEDictionaryOnce.Do(func() {
|
||||
err = json.Unmarshal(indexedCPEDictionaryData, &indexedCPEDictionary)
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
if indexedCPEDictionary == nil {
|
||||
err = fmt.Errorf("failed to unmarshal indexed CPE dictionary")
|
||||
return
|
||||
}
|
||||
|
||||
return indexedCPEDictionary, err
|
||||
}
|
||||
|
||||
func DictionaryFind(p pkg.Package) (cpe.CPE, bool) {
|
||||
dict, err := GetIndexedDictionary()
|
||||
if err != nil {
|
||||
log.Debugf("dictionary CPE lookup not available: %+v", err)
|
||||
return cpe.CPE{}, false
|
||||
}
|
||||
|
||||
var (
|
||||
cpeString string
|
||||
ok bool
|
||||
)
|
||||
|
||||
switch p.Type {
|
||||
case pkg.NpmPkg:
|
||||
cpeString, ok = dict.EcosystemPackages[dictionary.EcosystemNPM][p.Name]
|
||||
|
||||
case pkg.GemPkg:
|
||||
cpeString, ok = dict.EcosystemPackages[dictionary.EcosystemRubyGems][p.Name]
|
||||
|
||||
case pkg.PythonPkg:
|
||||
cpeString, ok = dict.EcosystemPackages[dictionary.EcosystemPyPI][p.Name]
|
||||
|
||||
case pkg.JenkinsPluginPkg:
|
||||
cpeString, ok = dict.EcosystemPackages[dictionary.EcosystemJenkinsPlugins][p.Name]
|
||||
|
||||
case pkg.RustPkg:
|
||||
cpeString, ok = dict.EcosystemPackages[dictionary.EcosystemRustCrates][p.Name]
|
||||
|
||||
default:
|
||||
// The dictionary doesn't support this package type yet.
|
||||
return cpe.CPE{}, false
|
||||
}
|
||||
|
||||
if !ok {
|
||||
// The dictionary doesn't have a CPE for this package.
|
||||
return cpe.CPE{}, false
|
||||
}
|
||||
|
||||
parsedCPE, err := cpe.New(cpeString)
|
||||
if err != nil {
|
||||
return cpe.CPE{}, false
|
||||
}
|
||||
|
||||
parsedCPE.Version = p.Version
|
||||
|
||||
return parsedCPE, true
|
||||
}
|
||||
|
||||
// Generate Create a list of CPEs for a given package, trying to guess the vendor, product tuple. We should be trying to
|
||||
// generate the minimal set of representative CPEs, which implies that optional fields should not be included
|
||||
// (such as target SW).
|
||||
|
|
|
@ -969,3 +969,33 @@ func Test_addSeparatorVariations(t *testing.T) {
|
|||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestDictionaryFindIsWired(t *testing.T) {
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
pkg pkg.Package
|
||||
want string
|
||||
wantExists bool
|
||||
}{
|
||||
{
|
||||
name: "sanity check that cpe data is wired up",
|
||||
pkg: pkg.Package{
|
||||
Name: "openssl",
|
||||
Version: "1.0.2k",
|
||||
Type: pkg.GemPkg,
|
||||
},
|
||||
want: "cpe:2.3:a:ruby-lang:openssl:1.0.2k:*:*:*:*:*:*:*",
|
||||
// without the cpe data wired up, this would be empty (generation also creates cpe:2.3:a:openssl:openssl:1.0.2k:*:*:*:*:*:*:*)
|
||||
wantExists: true,
|
||||
},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
got, gotExists := DictionaryFind(tt.pkg)
|
||||
|
||||
assert.Equal(t, tt.want, got.BindToFmtString())
|
||||
assert.Equal(t, tt.wantExists, gotExists)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue