mirror of
https://github.com/anchore/syft
synced 2024-11-10 06:14:16 +00:00
Introduce indexed embedded CPE dictionary (#1897)
* Introduce indexed embedded CPE dictionary Signed-off-by: Dan Luhring <dluhring@chainguard.dev> * Don't generate cpe-index on make snapshot Signed-off-by: Dan Luhring <dluhring@chainguard.dev> * Add unit tests for individual addEntry funcs Signed-off-by: Dan Luhring <dluhring@chainguard.dev> * migrate CPE index build to go generate and add periodic workflow Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com> * add test to ensure generated cpe index is wired up to function that uses it Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com> --------- Signed-off-by: Dan Luhring <dluhring@chainguard.dev> Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com> Co-authored-by: Alex Goodman <wagoodman@users.noreply.github.com>
This commit is contained in:
parent
3f5c601620
commit
99d172f0d1
16 changed files with 26855 additions and 4 deletions
2
.github/workflows/update-bootstrap-tools.yml
vendored
2
.github/workflows/update-bootstrap-tools.yml
vendored
|
@ -6,7 +6,7 @@ on:
|
||||||
workflow_dispatch:
|
workflow_dispatch:
|
||||||
|
|
||||||
env:
|
env:
|
||||||
GO_VERSION: "1.19.x"
|
GO_VERSION: "1.20.x"
|
||||||
GO_STABLE_VERSION: true
|
GO_STABLE_VERSION: true
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
|
|
43
.github/workflows/update-cpe-dictionary-index.yml
vendored
Normal file
43
.github/workflows/update-cpe-dictionary-index.yml
vendored
Normal file
|
@ -0,0 +1,43 @@
|
||||||
|
name: PR to update CPE dictionary index
|
||||||
|
on:
|
||||||
|
schedule:
|
||||||
|
- cron: "0 1 * * 1" # every monday at 1 AM
|
||||||
|
|
||||||
|
workflow_dispatch:
|
||||||
|
|
||||||
|
env:
|
||||||
|
GO_VERSION: "1.20.x"
|
||||||
|
GO_STABLE_VERSION: true
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
upgrade-cpe-dictionary-index:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
if: github.repository == 'anchore/syft' # only run for main repo
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v3
|
||||||
|
|
||||||
|
- uses: actions/setup-go@v4
|
||||||
|
with:
|
||||||
|
go-version: ${{ env.GO_VERSION }}
|
||||||
|
stable: ${{ env.GO_STABLE_VERSION }}
|
||||||
|
|
||||||
|
- run: |
|
||||||
|
make generate-cpe-dictionary-index
|
||||||
|
|
||||||
|
- uses: tibdex/github-app-token@v1
|
||||||
|
id: generate-token
|
||||||
|
with:
|
||||||
|
app_id: ${{ secrets.TOKEN_APP_ID }}
|
||||||
|
private_key: ${{ secrets.TOKEN_APP_PRIVATE_KEY }}
|
||||||
|
|
||||||
|
- uses: peter-evans/create-pull-request@v5
|
||||||
|
with:
|
||||||
|
signoff: true
|
||||||
|
delete-branch: true
|
||||||
|
branch: auto/latest-cpe-dictionary-index
|
||||||
|
labels: dependencies
|
||||||
|
commit-message: "chore(deps): update CPE dictionary index"
|
||||||
|
title: "chore(deps): update CPE dictionary index"
|
||||||
|
body: |
|
||||||
|
Update CPE dictionary index based on the latest available CPE dictionary
|
||||||
|
token: ${{ steps.generate-token.outputs.token }}
|
|
@ -6,7 +6,7 @@ on:
|
||||||
workflow_dispatch:
|
workflow_dispatch:
|
||||||
|
|
||||||
env:
|
env:
|
||||||
GO_VERSION: "1.19.x"
|
GO_VERSION: "1.20.x"
|
||||||
GO_STABLE_VERSION: true
|
GO_STABLE_VERSION: true
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
|
|
7
Makefile
7
Makefile
|
@ -298,7 +298,7 @@ compare-test-rpm-package-install: $(TEMP_DIR) $(SNAPSHOT_DIR)
|
||||||
$(TEMP_DIR)
|
$(TEMP_DIR)
|
||||||
|
|
||||||
|
|
||||||
## Code generation targets #################################
|
## Code and data generation targets #################################
|
||||||
|
|
||||||
.PHONY: generate-json-schema
|
.PHONY: generate-json-schema
|
||||||
generate-json-schema: ## Generate a new json schema
|
generate-json-schema: ## Generate a new json schema
|
||||||
|
@ -309,6 +309,11 @@ generate-license-list: ## Generate an updated spdx license list
|
||||||
go generate ./internal/spdxlicense/...
|
go generate ./internal/spdxlicense/...
|
||||||
gofmt -s -w ./internal/spdxlicense
|
gofmt -s -w ./internal/spdxlicense
|
||||||
|
|
||||||
|
.PHONY: generate-cpe-dictionary-index
|
||||||
|
generate-cpe-dictionary-index: ## Build the CPE index based off of the latest available CPE dictionary
|
||||||
|
$(call title,Building CPE index)
|
||||||
|
go generate ./syft/pkg/cataloger/common/cpe/dictionary
|
||||||
|
|
||||||
|
|
||||||
## Build-related targets #################################
|
## Build-related targets #################################
|
||||||
|
|
||||||
|
|
|
@ -76,7 +76,14 @@ func runCataloger(cataloger pkg.Cataloger, resolver file.Resolver) (catalogerRes
|
||||||
for _, p := range packages {
|
for _, p := range packages {
|
||||||
// generate CPEs (note: this is excluded from package ID, so is safe to mutate)
|
// generate CPEs (note: this is excluded from package ID, so is safe to mutate)
|
||||||
// we might have binary classified CPE already with the package so we want to append here
|
// we might have binary classified CPE already with the package so we want to append here
|
||||||
p.CPEs = append(p.CPEs, cpe.Generate(p)...)
|
|
||||||
|
dictionaryCPE, ok := cpe.DictionaryFind(p)
|
||||||
|
if ok {
|
||||||
|
log.Debugf("used CPE dictionary to find CPE for %s package %q: %s", p.Type, p.Name, dictionaryCPE.BindToFmtString())
|
||||||
|
p.CPEs = append(p.CPEs, dictionaryCPE)
|
||||||
|
} else {
|
||||||
|
p.CPEs = append(p.CPEs, cpe.Generate(p)...)
|
||||||
|
}
|
||||||
|
|
||||||
// if we were not able to identify the language we have an opportunity
|
// if we were not able to identify the language we have an opportunity
|
||||||
// to try and get this value from the PURL. Worst case we assert that
|
// to try and get this value from the PURL. Worst case we assert that
|
||||||
|
|
1296
syft/pkg/cataloger/common/cpe/dictionary/data/cpe-index.json
Normal file
1296
syft/pkg/cataloger/common/cpe/dictionary/data/cpe-index.json
Normal file
File diff suppressed because it is too large
Load diff
|
@ -0,0 +1,3 @@
|
||||||
|
package dictionary
|
||||||
|
|
||||||
|
//go:generate go run ./index-generator/ -o data/cpe-index.json
|
|
@ -0,0 +1,230 @@
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"compress/gzip"
|
||||||
|
"encoding/json"
|
||||||
|
"encoding/xml"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"log"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/facebookincubator/nvdtools/wfn"
|
||||||
|
"golang.org/x/exp/slices"
|
||||||
|
|
||||||
|
"github.com/anchore/syft/syft/pkg/cataloger/common/cpe/dictionary"
|
||||||
|
)
|
||||||
|
|
||||||
|
func generateIndexedDictionaryJSON(rawGzipData io.Reader) ([]byte, error) {
|
||||||
|
gzipReader, err := gzip.NewReader(rawGzipData)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("unable to decompress CPE dictionary: %w", err)
|
||||||
|
}
|
||||||
|
defer gzipReader.Close()
|
||||||
|
|
||||||
|
// Read XML data
|
||||||
|
data, err := io.ReadAll(gzipReader)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("unable to read CPE dictionary: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Unmarshal XML
|
||||||
|
var cpeList CpeList
|
||||||
|
if err := xml.Unmarshal(data, &cpeList); err != nil {
|
||||||
|
return nil, fmt.Errorf("unable to unmarshal CPE dictionary XML: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Filter out data that's not applicable here
|
||||||
|
cpeList = filterCpeList(cpeList)
|
||||||
|
|
||||||
|
// Create indexed dictionary to help with looking up CPEs
|
||||||
|
indexedDictionary := indexCPEList(cpeList)
|
||||||
|
|
||||||
|
// Convert to JSON
|
||||||
|
jsonData, err := json.MarshalIndent(indexedDictionary, "", " ")
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("unable to marshal CPE dictionary to JSON: %w", err)
|
||||||
|
}
|
||||||
|
return jsonData, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// filterCpeList removes CPE items that are not applicable to software packages.
|
||||||
|
func filterCpeList(cpeList CpeList) CpeList {
|
||||||
|
var processedCpeList CpeList
|
||||||
|
|
||||||
|
seen := make(map[string]struct{})
|
||||||
|
|
||||||
|
for _, cpeItem := range cpeList.CpeItems {
|
||||||
|
// Skip CPE items that don't have any references.
|
||||||
|
if len(cpeItem.References) == 0 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// Skip CPE items where the CPE URI doesn't meet our criteria.
|
||||||
|
parsedName, err := wfn.Parse(cpeItem.Name)
|
||||||
|
if err != nil {
|
||||||
|
log.Printf("unable to parse CPE URI %q: %s", cpeItem.Name, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if slices.Contains([]string{"h", "o"}, parsedName.Part) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
normalizedName := normalizeCPE(parsedName).BindToURI()
|
||||||
|
if _, ok := seen[normalizedName]; ok {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
seen[normalizedName] = struct{}{}
|
||||||
|
cpeItem.Name = normalizedName
|
||||||
|
|
||||||
|
parsedCPE, err := wfn.Parse(cpeItem.Cpe23Item.Name)
|
||||||
|
if err != nil {
|
||||||
|
log.Printf("unable to parse CPE value %q: %s", cpeItem.Cpe23Item.Name, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
cpeItem.Cpe23Item.Name = normalizeCPE(parsedCPE).BindToFmtString()
|
||||||
|
|
||||||
|
processedCpeList.CpeItems = append(processedCpeList.CpeItems, cpeItem)
|
||||||
|
}
|
||||||
|
|
||||||
|
return processedCpeList
|
||||||
|
}
|
||||||
|
|
||||||
|
// normalizeCPE removes the version and update parts of a CPE.
|
||||||
|
func normalizeCPE(cpe *wfn.Attributes) *wfn.Attributes {
|
||||||
|
cpeCopy := *cpe
|
||||||
|
|
||||||
|
cpeCopy.Version = ""
|
||||||
|
cpeCopy.Update = ""
|
||||||
|
|
||||||
|
return &cpeCopy
|
||||||
|
}
|
||||||
|
|
||||||
|
const (
|
||||||
|
prefixForNPMPackages = "https://www.npmjs.com/package/"
|
||||||
|
prefixForRubyGems = "https://rubygems.org/gems/"
|
||||||
|
prefixForRubyGemsHTTP = "http://rubygems.org/gems/"
|
||||||
|
prefixForNativeRubyGems = "https://github.com/ruby/"
|
||||||
|
prefixForPyPIPackages = "https://pypi.org/project/"
|
||||||
|
prefixForJenkinsPlugins = "https://github.com/jenkinsci/"
|
||||||
|
prefixForRustCrates = "https://crates.io/crates/"
|
||||||
|
)
|
||||||
|
|
||||||
|
// indexCPEList creates an index of CPEs by ecosystem.
|
||||||
|
func indexCPEList(list CpeList) *dictionary.Indexed {
|
||||||
|
indexed := &dictionary.Indexed{
|
||||||
|
EcosystemPackages: make(map[string]dictionary.Packages),
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, cpeItem := range list.CpeItems {
|
||||||
|
cpeItemName := cpeItem.Cpe23Item.Name
|
||||||
|
|
||||||
|
for _, reference := range cpeItem.References {
|
||||||
|
ref := reference.Reference.Href
|
||||||
|
|
||||||
|
switch {
|
||||||
|
case strings.HasPrefix(ref, prefixForNPMPackages):
|
||||||
|
addEntryForNPMPackage(indexed, ref, cpeItemName)
|
||||||
|
|
||||||
|
case strings.HasPrefix(ref, prefixForRubyGems), strings.HasPrefix(ref, prefixForRubyGemsHTTP):
|
||||||
|
addEntryForRubyGem(indexed, ref, cpeItemName)
|
||||||
|
|
||||||
|
case strings.HasPrefix(ref, prefixForNativeRubyGems):
|
||||||
|
addEntryForNativeRubyGem(indexed, ref, cpeItemName)
|
||||||
|
|
||||||
|
case strings.HasPrefix(ref, prefixForPyPIPackages):
|
||||||
|
addEntryForPyPIPackage(indexed, ref, cpeItemName)
|
||||||
|
|
||||||
|
case strings.HasPrefix(ref, prefixForJenkinsPlugins):
|
||||||
|
// It _might_ be a jenkins plugin!
|
||||||
|
addEntryForJenkinsPlugin(indexed, ref, cpeItemName)
|
||||||
|
|
||||||
|
case strings.HasPrefix(ref, prefixForRustCrates):
|
||||||
|
addEntryForRustCrate(indexed, ref, cpeItemName)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return indexed
|
||||||
|
}
|
||||||
|
|
||||||
|
func addEntryForRustCrate(indexed *dictionary.Indexed, ref string, cpeItemName string) {
|
||||||
|
// Prune off the non-package-name parts of the URL
|
||||||
|
ref = strings.TrimPrefix(ref, prefixForRustCrates)
|
||||||
|
ref = strings.Split(ref, "/")[0]
|
||||||
|
|
||||||
|
if _, ok := indexed.EcosystemPackages[dictionary.EcosystemRustCrates]; !ok {
|
||||||
|
indexed.EcosystemPackages[dictionary.EcosystemRustCrates] = make(dictionary.Packages)
|
||||||
|
}
|
||||||
|
|
||||||
|
indexed.EcosystemPackages[dictionary.EcosystemRustCrates][ref] = cpeItemName
|
||||||
|
}
|
||||||
|
|
||||||
|
func addEntryForJenkinsPlugin(indexed *dictionary.Indexed, ref string, cpeItemName string) {
|
||||||
|
// Prune off the non-package-name parts of the URL
|
||||||
|
ref = strings.TrimPrefix(ref, prefixForJenkinsPlugins)
|
||||||
|
ref = strings.Split(ref, "/")[0]
|
||||||
|
|
||||||
|
if !strings.HasSuffix(ref, "-plugin") {
|
||||||
|
// It's not a jenkins plugin!
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
ref = strings.TrimSuffix(ref, "-plugin")
|
||||||
|
|
||||||
|
if _, ok := indexed.EcosystemPackages[dictionary.EcosystemJenkinsPlugins]; !ok {
|
||||||
|
indexed.EcosystemPackages[dictionary.EcosystemJenkinsPlugins] = make(dictionary.Packages)
|
||||||
|
}
|
||||||
|
|
||||||
|
indexed.EcosystemPackages[dictionary.EcosystemJenkinsPlugins][ref] = cpeItemName
|
||||||
|
}
|
||||||
|
|
||||||
|
func addEntryForPyPIPackage(indexed *dictionary.Indexed, ref string, cpeItemName string) {
|
||||||
|
// Prune off the non-package-name parts of the URL
|
||||||
|
ref = strings.TrimPrefix(ref, prefixForPyPIPackages)
|
||||||
|
ref = strings.Split(ref, "/")[0]
|
||||||
|
|
||||||
|
if _, ok := indexed.EcosystemPackages[dictionary.EcosystemPyPI]; !ok {
|
||||||
|
indexed.EcosystemPackages[dictionary.EcosystemPyPI] = make(dictionary.Packages)
|
||||||
|
}
|
||||||
|
|
||||||
|
indexed.EcosystemPackages[dictionary.EcosystemPyPI][ref] = cpeItemName
|
||||||
|
}
|
||||||
|
|
||||||
|
func addEntryForNativeRubyGem(indexed *dictionary.Indexed, ref string, cpeItemName string) {
|
||||||
|
// Prune off the non-package-name parts of the URL
|
||||||
|
ref = strings.TrimPrefix(ref, prefixForNativeRubyGems)
|
||||||
|
ref = strings.Split(ref, "/")[0]
|
||||||
|
|
||||||
|
if _, ok := indexed.EcosystemPackages[dictionary.EcosystemRubyGems]; !ok {
|
||||||
|
indexed.EcosystemPackages[dictionary.EcosystemRubyGems] = make(dictionary.Packages)
|
||||||
|
}
|
||||||
|
|
||||||
|
indexed.EcosystemPackages[dictionary.EcosystemRubyGems][ref] = cpeItemName
|
||||||
|
}
|
||||||
|
|
||||||
|
func addEntryForRubyGem(indexed *dictionary.Indexed, ref string, cpeItemName string) {
|
||||||
|
// Prune off the non-package-name parts of the URL
|
||||||
|
ref = strings.TrimPrefix(ref, prefixForRubyGems)
|
||||||
|
ref = strings.TrimPrefix(ref, prefixForRubyGemsHTTP)
|
||||||
|
ref = strings.Split(ref, "/")[0]
|
||||||
|
|
||||||
|
if _, ok := indexed.EcosystemPackages[dictionary.EcosystemRubyGems]; !ok {
|
||||||
|
indexed.EcosystemPackages[dictionary.EcosystemRubyGems] = make(dictionary.Packages)
|
||||||
|
}
|
||||||
|
|
||||||
|
indexed.EcosystemPackages[dictionary.EcosystemRubyGems][ref] = cpeItemName
|
||||||
|
}
|
||||||
|
|
||||||
|
func addEntryForNPMPackage(indexed *dictionary.Indexed, ref string, cpeItemName string) {
|
||||||
|
// Prune off the non-package-name parts of the URL
|
||||||
|
ref = strings.Split(ref, "/v/")[0]
|
||||||
|
ref = strings.Split(ref, "?")[0]
|
||||||
|
ref = strings.TrimPrefix(ref, prefixForNPMPackages)
|
||||||
|
|
||||||
|
if _, ok := indexed.EcosystemPackages[dictionary.EcosystemNPM]; !ok {
|
||||||
|
indexed.EcosystemPackages[dictionary.EcosystemNPM] = make(dictionary.Packages)
|
||||||
|
}
|
||||||
|
|
||||||
|
indexed.EcosystemPackages[dictionary.EcosystemNPM][ref] = cpeItemName
|
||||||
|
}
|
|
@ -0,0 +1,169 @@
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"compress/gzip"
|
||||||
|
"io"
|
||||||
|
"os"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/google/go-cmp/cmp"
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
|
||||||
|
"github.com/anchore/syft/syft/pkg/cataloger/common/cpe/dictionary"
|
||||||
|
)
|
||||||
|
|
||||||
|
func Test_generateIndexedDictionaryJSON(t *testing.T) {
|
||||||
|
f, err := os.Open("testdata/official-cpe-dictionary_v2.3.xml")
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
// Create a buffer to store the gzipped data in memory
|
||||||
|
buf := new(bytes.Buffer)
|
||||||
|
|
||||||
|
w := gzip.NewWriter(buf)
|
||||||
|
_, err = io.Copy(w, f)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
// (finalize the gzip stream)
|
||||||
|
err = w.Close()
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
dictionaryJSON, err := generateIndexedDictionaryJSON(buf)
|
||||||
|
assert.NoError(t, err)
|
||||||
|
|
||||||
|
expected, err := os.ReadFile("./testdata/expected-cpe-index.json")
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
expectedDictionaryJSONString := string(expected)
|
||||||
|
dictionaryJSONString := string(dictionaryJSON)
|
||||||
|
|
||||||
|
if diff := cmp.Diff(expectedDictionaryJSONString, dictionaryJSONString); diff != "" {
|
||||||
|
t.Errorf("generateIndexedDictionaryJSON() mismatch (-want +got):\n%s", diff)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func Test_addEntryFuncs(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
addEntryFunc func(indexed *dictionary.Indexed, ref string, cpeItemName string)
|
||||||
|
inputRef string
|
||||||
|
inputCpeItemName string
|
||||||
|
expectedIndexed dictionary.Indexed
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "addEntryForRustCrate",
|
||||||
|
addEntryFunc: addEntryForRustCrate,
|
||||||
|
inputRef: "https://crates.io/crates/unicycle/versions",
|
||||||
|
inputCpeItemName: "cpe:2.3:a:unicycle_project:unicycle:*:*:*:*:*:rust:*:*",
|
||||||
|
expectedIndexed: dictionary.Indexed{
|
||||||
|
EcosystemPackages: map[string]dictionary.Packages{
|
||||||
|
dictionary.EcosystemRustCrates: {
|
||||||
|
"unicycle": "cpe:2.3:a:unicycle_project:unicycle:*:*:*:*:*:rust:*:*",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "addEntryForJenkinsPlugin",
|
||||||
|
addEntryFunc: addEntryForJenkinsPlugin,
|
||||||
|
inputRef: "https://github.com/jenkinsci/sonarqube-plugin",
|
||||||
|
inputCpeItemName: "cpe:2.3:a:sonarsource:sonarqube_scanner:2.7:*:*:*:*:jenkins:*:*",
|
||||||
|
expectedIndexed: dictionary.Indexed{
|
||||||
|
EcosystemPackages: map[string]dictionary.Packages{
|
||||||
|
dictionary.EcosystemJenkinsPlugins: {
|
||||||
|
"sonarqube": "cpe:2.3:a:sonarsource:sonarqube_scanner:2.7:*:*:*:*:jenkins:*:*",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "addEntryForJenkinsPlugin: not actually a plugin",
|
||||||
|
addEntryFunc: addEntryForJenkinsPlugin,
|
||||||
|
inputRef: "https://github.com/jenkinsci/jenkins",
|
||||||
|
inputCpeItemName: "cpe:2.3:a:jenkins:jenkinsci:2.7:*:*:*:*:*:*:*",
|
||||||
|
expectedIndexed: dictionary.Indexed{
|
||||||
|
EcosystemPackages: map[string]dictionary.Packages{},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "addEntryForPyPIPackage",
|
||||||
|
addEntryFunc: addEntryForPyPIPackage,
|
||||||
|
inputRef: "https://pypi.org/project/vault-cli/#history",
|
||||||
|
inputCpeItemName: "cpe:2.3:a:vault-cli_project:vault-cli:*:*:*:*:*:python:*:*",
|
||||||
|
expectedIndexed: dictionary.Indexed{
|
||||||
|
EcosystemPackages: map[string]dictionary.Packages{
|
||||||
|
dictionary.EcosystemPyPI: {
|
||||||
|
"vault-cli": "cpe:2.3:a:vault-cli_project:vault-cli:*:*:*:*:*:python:*:*",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "addEntryForNativeRubyGem",
|
||||||
|
addEntryFunc: addEntryForNativeRubyGem,
|
||||||
|
inputRef: "https://github.com/ruby/openssl/releases",
|
||||||
|
inputCpeItemName: "cpe:2.3:a:ruby-lang:openssl:-:*:*:*:*:ruby:*:*",
|
||||||
|
expectedIndexed: dictionary.Indexed{
|
||||||
|
EcosystemPackages: map[string]dictionary.Packages{
|
||||||
|
dictionary.EcosystemRubyGems: {
|
||||||
|
"openssl": "cpe:2.3:a:ruby-lang:openssl:-:*:*:*:*:ruby:*:*",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "addEntryForRubyGem: https",
|
||||||
|
addEntryFunc: addEntryForRubyGem,
|
||||||
|
inputRef: "https://rubygems.org/gems/actionview/versions",
|
||||||
|
inputCpeItemName: "cpe:2.3:a:action_view_project:action_view:*:*:*:*:*:ruby:*:*",
|
||||||
|
expectedIndexed: dictionary.Indexed{
|
||||||
|
EcosystemPackages: map[string]dictionary.Packages{
|
||||||
|
dictionary.EcosystemRubyGems: {
|
||||||
|
"actionview": "cpe:2.3:a:action_view_project:action_view:*:*:*:*:*:ruby:*:*",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "addEntryForRubyGem: http",
|
||||||
|
addEntryFunc: addEntryForRubyGem,
|
||||||
|
inputRef: "http://rubygems.org/gems/rbovirt",
|
||||||
|
inputCpeItemName: "cpe:2.3:a:amos_benari:rbovirt:*:*:*:*:*:ruby:*:*",
|
||||||
|
expectedIndexed: dictionary.Indexed{
|
||||||
|
EcosystemPackages: map[string]dictionary.Packages{
|
||||||
|
dictionary.EcosystemRubyGems: {
|
||||||
|
"rbovirt": "cpe:2.3:a:amos_benari:rbovirt:*:*:*:*:*:ruby:*:*",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "addEntryForNPMPackage",
|
||||||
|
addEntryFunc: addEntryForNPMPackage,
|
||||||
|
inputRef: "https://www.npmjs.com/package/@nubosoftware/node-static",
|
||||||
|
inputCpeItemName: "cpe:2.3:a:\\@nubosoftware\\/node-static_project:\\@nubosoftware\\/node-static:-:*:*:*:*:node.js:*:*",
|
||||||
|
expectedIndexed: dictionary.Indexed{
|
||||||
|
EcosystemPackages: map[string]dictionary.Packages{
|
||||||
|
dictionary.EcosystemNPM: {
|
||||||
|
"@nubosoftware/node-static": "cpe:2.3:a:\\@nubosoftware\\/node-static_project:\\@nubosoftware\\/node-static:-:*:*:*:*:node.js:*:*",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
indexed := &dictionary.Indexed{
|
||||||
|
EcosystemPackages: make(map[string]dictionary.Packages),
|
||||||
|
}
|
||||||
|
|
||||||
|
tt.addEntryFunc(indexed, tt.inputRef, tt.inputCpeItemName)
|
||||||
|
|
||||||
|
if diff := cmp.Diff(tt.expectedIndexed, *indexed); diff != "" {
|
||||||
|
t.Errorf("addEntry* mismatch (-want +got):\n%s", diff)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,57 @@
|
||||||
|
// This program downloads the latest CPE dictionary from NIST and processes it into a JSON file that can be embedded into Syft for more accurate CPE results.
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"errors"
|
||||||
|
"flag"
|
||||||
|
"fmt"
|
||||||
|
"log"
|
||||||
|
"net/http"
|
||||||
|
"os"
|
||||||
|
)
|
||||||
|
|
||||||
|
func mainE() error {
|
||||||
|
var outputFilename string
|
||||||
|
flag.StringVar(&outputFilename, "o", "", "file location to save CPE index")
|
||||||
|
flag.Parse()
|
||||||
|
|
||||||
|
if outputFilename == "" {
|
||||||
|
return errors.New("-o is required")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Download and decompress file
|
||||||
|
fmt.Println("Fetching CPE dictionary...")
|
||||||
|
resp, err := http.Get(cpeDictionaryURL)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("unable to get CPE dictionary: %w", err)
|
||||||
|
}
|
||||||
|
defer resp.Body.Close()
|
||||||
|
|
||||||
|
fmt.Println("Generating index...")
|
||||||
|
dictionaryJSON, err := generateIndexedDictionaryJSON(resp.Body)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Write CPE index (JSON data) to disk
|
||||||
|
err = os.WriteFile(outputFilename, dictionaryJSON, 0600)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("unable to write processed CPE dictionary to file: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
fmt.Println("Done!")
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// errExit prints an error and exits with a non-zero exit code.
|
||||||
|
func errExit(err error) {
|
||||||
|
log.Printf("command failed: %s", err)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
if err := mainE(); err != nil {
|
||||||
|
errExit(err)
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,21 @@
|
||||||
|
package main
|
||||||
|
|
||||||
|
type CpeItem struct {
|
||||||
|
Name string `xml:"name,attr"`
|
||||||
|
Title string `xml:"title"`
|
||||||
|
References []struct {
|
||||||
|
Reference struct {
|
||||||
|
Href string `xml:"href,attr"`
|
||||||
|
Body string `xml:",chardata"`
|
||||||
|
} `xml:"reference"`
|
||||||
|
} `xml:"references"`
|
||||||
|
Cpe23Item struct {
|
||||||
|
Name string `xml:"name,attr"`
|
||||||
|
} `xml:"cpe23-item"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type CpeList struct {
|
||||||
|
CpeItems []CpeItem `xml:"cpe-item"`
|
||||||
|
}
|
||||||
|
|
||||||
|
const cpeDictionaryURL = "https://nvd.nist.gov/feeds/xml/cpe/dictionary/official-cpe-dictionary_v2.3.xml.gz"
|
23
syft/pkg/cataloger/common/cpe/dictionary/index-generator/testdata/expected-cpe-index.json
vendored
Normal file
23
syft/pkg/cataloger/common/cpe/dictionary/index-generator/testdata/expected-cpe-index.json
vendored
Normal file
|
@ -0,0 +1,23 @@
|
||||||
|
{
|
||||||
|
"ecosystems": {
|
||||||
|
"jenkins_plugins": {
|
||||||
|
"sonarqube": "cpe:2.3:a:sonarsource:sonarqube_scanner:*:*:*:*:*:jenkins:*:*"
|
||||||
|
},
|
||||||
|
"npm": {
|
||||||
|
"merge-recursive": "cpe:2.3:a:umbraengineering:merge-recursive:*:*:*:*:*:node.js:*:*",
|
||||||
|
"static-dev-server": "cpe:2.3:a:static-dev-server_project:static-dev-server:*:*:*:*:*:node.js:*:*",
|
||||||
|
"umount": "cpe:2.3:a:umount_project:umount:*:*:*:*:*:node.js:*:*",
|
||||||
|
"undefsafe": "cpe:2.3:a:undefsafe_project:undefsafe:*:*:*:*:*:node.js:*:*",
|
||||||
|
"underscore": "cpe:2.3:a:underscorejs:underscore:*:*:*:*:*:node.js:*:*",
|
||||||
|
"underscore-99xp": "cpe:2.3:a:underscore-99xp_project:underscore-99xp:*:*:*:*:*:node.js:*:*",
|
||||||
|
"unicode": "cpe:2.3:a:unicode_project:unicode:*:*:*:*:*:node.js:*:*",
|
||||||
|
"unicorn-list": "cpe:2.3:a:unicorn-list_project:unicorn-list:*:*:*:*:*:node.js:*:*"
|
||||||
|
},
|
||||||
|
"rubygems": {
|
||||||
|
"openssl": "cpe:2.3:a:ruby-lang:openssl:*:*:*:*:*:*:*:*"
|
||||||
|
},
|
||||||
|
"rust_crates": {
|
||||||
|
"unicycle": "cpe:2.3:a:unicycle_project:unicycle:*:*:*:*:*:rust:*:*"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
24876
syft/pkg/cataloger/common/cpe/dictionary/index-generator/testdata/official-cpe-dictionary_v2.3.xml
vendored
Normal file
24876
syft/pkg/cataloger/common/cpe/dictionary/index-generator/testdata/official-cpe-dictionary_v2.3.xml
vendored
Normal file
File diff suppressed because it is too large
Load diff
15
syft/pkg/cataloger/common/cpe/dictionary/types.go
Normal file
15
syft/pkg/cataloger/common/cpe/dictionary/types.go
Normal file
|
@ -0,0 +1,15 @@
|
||||||
|
package dictionary
|
||||||
|
|
||||||
|
const (
|
||||||
|
EcosystemNPM = "npm"
|
||||||
|
EcosystemRubyGems = "rubygems"
|
||||||
|
EcosystemPyPI = "pypi"
|
||||||
|
EcosystemJenkinsPlugins = "jenkins_plugins"
|
||||||
|
EcosystemRustCrates = "rust_crates"
|
||||||
|
)
|
||||||
|
|
||||||
|
type Indexed struct {
|
||||||
|
EcosystemPackages map[string]Packages `json:"ecosystems"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type Packages map[string]string
|
|
@ -3,16 +3,21 @@ package cpe
|
||||||
import (
|
import (
|
||||||
"bufio"
|
"bufio"
|
||||||
"bytes"
|
"bytes"
|
||||||
|
_ "embed"
|
||||||
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
"sort"
|
"sort"
|
||||||
"strings"
|
"strings"
|
||||||
|
"sync"
|
||||||
|
|
||||||
"github.com/facebookincubator/nvdtools/wfn"
|
"github.com/facebookincubator/nvdtools/wfn"
|
||||||
"github.com/scylladb/go-set/strset"
|
"github.com/scylladb/go-set/strset"
|
||||||
|
|
||||||
"github.com/anchore/syft/internal"
|
"github.com/anchore/syft/internal"
|
||||||
|
"github.com/anchore/syft/internal/log"
|
||||||
"github.com/anchore/syft/syft/cpe"
|
"github.com/anchore/syft/syft/cpe"
|
||||||
"github.com/anchore/syft/syft/pkg"
|
"github.com/anchore/syft/syft/pkg"
|
||||||
|
"github.com/anchore/syft/syft/pkg/cataloger/common/cpe/dictionary"
|
||||||
)
|
)
|
||||||
|
|
||||||
// knownVendors contains vendor strings that are known to exist in
|
// knownVendors contains vendor strings that are known to exist in
|
||||||
|
@ -32,6 +37,77 @@ func newCPE(product, vendor, version, targetSW string) *wfn.Attributes {
|
||||||
return &c
|
return &c
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//go:embed dictionary/data/cpe-index.json
|
||||||
|
var indexedCPEDictionaryData []byte
|
||||||
|
|
||||||
|
var indexedCPEDictionary *dictionary.Indexed
|
||||||
|
var indexedCPEDictionaryOnce sync.Once
|
||||||
|
|
||||||
|
func GetIndexedDictionary() (_ *dictionary.Indexed, err error) {
|
||||||
|
indexedCPEDictionaryOnce.Do(func() {
|
||||||
|
err = json.Unmarshal(indexedCPEDictionaryData, &indexedCPEDictionary)
|
||||||
|
})
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if indexedCPEDictionary == nil {
|
||||||
|
err = fmt.Errorf("failed to unmarshal indexed CPE dictionary")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
return indexedCPEDictionary, err
|
||||||
|
}
|
||||||
|
|
||||||
|
func DictionaryFind(p pkg.Package) (cpe.CPE, bool) {
|
||||||
|
dict, err := GetIndexedDictionary()
|
||||||
|
if err != nil {
|
||||||
|
log.Debugf("dictionary CPE lookup not available: %+v", err)
|
||||||
|
return cpe.CPE{}, false
|
||||||
|
}
|
||||||
|
|
||||||
|
var (
|
||||||
|
cpeString string
|
||||||
|
ok bool
|
||||||
|
)
|
||||||
|
|
||||||
|
switch p.Type {
|
||||||
|
case pkg.NpmPkg:
|
||||||
|
cpeString, ok = dict.EcosystemPackages[dictionary.EcosystemNPM][p.Name]
|
||||||
|
|
||||||
|
case pkg.GemPkg:
|
||||||
|
cpeString, ok = dict.EcosystemPackages[dictionary.EcosystemRubyGems][p.Name]
|
||||||
|
|
||||||
|
case pkg.PythonPkg:
|
||||||
|
cpeString, ok = dict.EcosystemPackages[dictionary.EcosystemPyPI][p.Name]
|
||||||
|
|
||||||
|
case pkg.JenkinsPluginPkg:
|
||||||
|
cpeString, ok = dict.EcosystemPackages[dictionary.EcosystemJenkinsPlugins][p.Name]
|
||||||
|
|
||||||
|
case pkg.RustPkg:
|
||||||
|
cpeString, ok = dict.EcosystemPackages[dictionary.EcosystemRustCrates][p.Name]
|
||||||
|
|
||||||
|
default:
|
||||||
|
// The dictionary doesn't support this package type yet.
|
||||||
|
return cpe.CPE{}, false
|
||||||
|
}
|
||||||
|
|
||||||
|
if !ok {
|
||||||
|
// The dictionary doesn't have a CPE for this package.
|
||||||
|
return cpe.CPE{}, false
|
||||||
|
}
|
||||||
|
|
||||||
|
parsedCPE, err := cpe.New(cpeString)
|
||||||
|
if err != nil {
|
||||||
|
return cpe.CPE{}, false
|
||||||
|
}
|
||||||
|
|
||||||
|
parsedCPE.Version = p.Version
|
||||||
|
|
||||||
|
return parsedCPE, true
|
||||||
|
}
|
||||||
|
|
||||||
// Generate Create a list of CPEs for a given package, trying to guess the vendor, product tuple. We should be trying to
|
// Generate Create a list of CPEs for a given package, trying to guess the vendor, product tuple. We should be trying to
|
||||||
// generate the minimal set of representative CPEs, which implies that optional fields should not be included
|
// generate the minimal set of representative CPEs, which implies that optional fields should not be included
|
||||||
// (such as target SW).
|
// (such as target SW).
|
||||||
|
|
|
@ -969,3 +969,33 @@ func Test_addSeparatorVariations(t *testing.T) {
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestDictionaryFindIsWired(t *testing.T) {
|
||||||
|
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
pkg pkg.Package
|
||||||
|
want string
|
||||||
|
wantExists bool
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "sanity check that cpe data is wired up",
|
||||||
|
pkg: pkg.Package{
|
||||||
|
Name: "openssl",
|
||||||
|
Version: "1.0.2k",
|
||||||
|
Type: pkg.GemPkg,
|
||||||
|
},
|
||||||
|
want: "cpe:2.3:a:ruby-lang:openssl:1.0.2k:*:*:*:*:*:*:*",
|
||||||
|
// without the cpe data wired up, this would be empty (generation also creates cpe:2.3:a:openssl:openssl:1.0.2k:*:*:*:*:*:*:*)
|
||||||
|
wantExists: true,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
got, gotExists := DictionaryFind(tt.pkg)
|
||||||
|
|
||||||
|
assert.Equal(t, tt.want, got.BindToFmtString())
|
||||||
|
assert.Equal(t, tt.wantExists, gotExists)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in a new issue