bug: spdx checksum empty array; allow syft to generate SHA1 for spdx-tag-value documents (#1404)

This commit is contained in:
Christopher Angelo Phillips 2022-12-19 19:10:35 -05:00 committed by GitHub
parent 8b38549b79
commit 0f1e8fca14
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
16 changed files with 967 additions and 636 deletions

2
go.mod
View file

@ -64,6 +64,7 @@ require (
github.com/sigstore/rekor v0.12.1-0.20220915152154-4bb6f441c1b2
github.com/sigstore/sigstore v1.4.4
github.com/vbatts/go-mtree v0.5.0
golang.org/x/exp v0.0.0-20220823124025-807a23277127
gopkg.in/yaml.v3 v3.0.1
)
@ -288,7 +289,6 @@ require (
go.uber.org/atomic v1.10.0 // indirect
go.uber.org/multierr v1.8.0 // indirect
go.uber.org/zap v1.23.0 // indirect
golang.org/x/exp v0.0.0-20220823124025-807a23277127 // indirect
golang.org/x/oauth2 v0.0.0-20221006150949-b44042a4b9c1 // indirect
golang.org/x/sync v0.0.0-20220929204114-8fcdb60fdcc0 // indirect
golang.org/x/sys v0.0.0-20220928140112-f11e5e49a4ec // indirect

File diff suppressed because it is too large Load diff

View file

@ -17,6 +17,15 @@ const (
DescribedByRelationship RelationshipType = "described-by"
)
func AllRelationshipTypes() []RelationshipType {
return []RelationshipType{
OwnershipByFileOverlapRelationship,
ContainsRelationship,
DependencyOfRelationship,
DescribedByRelationship,
}
}
type RelationshipType string
type Relationship struct {

View file

@ -92,15 +92,11 @@ func DigestsFromFile(closer io.ReadCloser, hashes []crypto.Hash) ([]Digest, erro
writers[idx] = hashers[idx]
}
size, err := io.Copy(io.MultiWriter(writers...), closer)
_, err := io.Copy(io.MultiWriter(writers...), closer)
if err != nil {
return nil, err
}
if size == 0 {
return make([]Digest, 0), nil
}
result := make([]Digest, len(hashes))
// only capture digests when there is content. It is important to do this based on SIZE and not
// FILE TYPE. The reasoning is that it is possible for a tar to be crafted with a header-only

View file

@ -29,12 +29,6 @@ func testDigests(t testing.TB, root string, files []string, hashes ...crypto.Has
t.Fatalf("could not read %q : %+v", f, err)
}
if len(b) == 0 {
// we don't keep digests for empty files
digests[source.NewLocation(f).Coordinates] = []Digest{}
continue
}
for _, hash := range hashes {
h := hash.New()
h.Write(b)

View file

@ -1,6 +1,8 @@
//nolint:gosec // sha1 is used as a required hash function for SPDX, not a crypto function
package spdxhelpers
import (
"crypto/sha1"
"fmt"
"sort"
"strings"
@ -103,7 +105,7 @@ func ToFormatModel(s sbom.SBOM) *spdx.Document {
// Cardinality: optional, one
CreatorComment: "",
},
Packages: toPackages(s.Artifacts.PackageCatalog),
Packages: toPackages(s.Artifacts.PackageCatalog, s),
Files: toFiles(s),
Relationships: toRelationships(s.RelationshipsSorted()),
}
@ -123,7 +125,7 @@ func toSPDXID(identifiable artifact.Identifiable) common.ElementID {
// packages populates all Package Information from the package Catalog (see https://spdx.github.io/spdx-spec/3-package-information/)
//
//nolint:funlen
func toPackages(catalog *pkg.Catalog) (results []*spdx.Package) {
func toPackages(catalog *pkg.Catalog, sbom sbom.SBOM) (results []*spdx.Package) {
for _, p := range catalog.Sorted() {
// name should be guaranteed to be unique, but semantically useful and stable
id := toSPDXID(p)
@ -132,7 +134,25 @@ func toPackages(catalog *pkg.Catalog) (results []*spdx.Package) {
// in the Comments on License field (section 7.16). With respect to NOASSERTION, a written explanation in
// the Comments on License field (section 7.16) is preferred.
license := License(p)
checksums, filesAnalyzed := toPackageChecksums(p)
// two ways to get filesAnalyzed == true:
// 1. syft has generated a sha1 digest for the package itself - usually in the java cataloger
// 2. syft has generated a sha1 digest for the package's contents
packageChecksums, filesAnalyzed := toPackageChecksums(p)
packageVerificationCode := newPackageVerificationCode(p, sbom)
if packageVerificationCode != nil {
filesAnalyzed = true
}
// invalid SPDX document state
if filesAnalyzed && packageVerificationCode == nil {
// this is an invalid document state
// we reset the filesAnalyzed flag to false to avoid
// cases where a package digest was generated but there was
// not enough metadata to generate a verification code regarding the files
filesAnalyzed = false
}
results = append(results, &spdx.Package{
// NOT PART OF SPEC
@ -193,7 +213,7 @@ func toPackages(catalog *pkg.Catalog) (results []*spdx.Package) {
// 7.9: Package Verification Code
// Cardinality: optional, one if filesAnalyzed is true / omitted;
// zero (must be omitted) if filesAnalyzed is false
PackageVerificationCode: nil,
PackageVerificationCode: packageVerificationCode,
// 7.10: Package Checksum: may have keys for SHA1, SHA256 and/or MD5
// Cardinality: optional, one or many
@ -203,7 +223,7 @@ func toPackages(catalog *pkg.Catalog) (results []*spdx.Package) {
// to determine if any file in the original package has been changed. If the SPDX file is to be included
// in a package, this value should not be calculated. The SHA-1 algorithm will be used to provide the
// checksum by default.
PackageChecksums: checksums,
PackageChecksums: packageChecksums,
// 7.11: Package Home Page
// Cardinality: optional, one
@ -275,17 +295,6 @@ func toPackages(catalog *pkg.Catalog) (results []*spdx.Package) {
return results
}
func toPackageOriginator(p pkg.Package) *common.Originator {
kind, originator := Originator(p)
if kind == "" || originator == "" {
return nil
}
return &common.Originator{
Originator: originator,
OriginatorType: kind,
}
}
func toPackageChecksums(p pkg.Package) ([]common.Checksum, bool) {
filesAnalyzed := false
var checksums []common.Checksum
@ -293,6 +302,7 @@ func toPackageChecksums(p pkg.Package) ([]common.Checksum, bool) {
// we generate digest for some Java packages
// spdx.github.io/spdx-spec/package-information/#710-package-checksum-field
case pkg.JavaMetadata:
// if syft has generated the digest here then filesAnalyzed is true
if len(meta.ArchiveDigests) > 0 {
filesAnalyzed = true
for _, digest := range meta.ArchiveDigests {
@ -304,6 +314,7 @@ func toPackageChecksums(p pkg.Package) ([]common.Checksum, bool) {
}
}
case pkg.GolangBinMetadata:
// because the H1 digest is found in the Golang metadata we cannot claim that the files were analyzed
algo, hexStr, err := util.HDigestToSHA(meta.H1Digest)
if err != nil {
log.Debugf("invalid h1digest: %s: %v", meta.H1Digest, err)
@ -318,6 +329,17 @@ func toPackageChecksums(p pkg.Package) ([]common.Checksum, bool) {
return checksums, filesAnalyzed
}
func toPackageOriginator(p pkg.Package) *common.Originator {
kind, originator := Originator(p)
if kind == "" || originator == "" {
return nil
}
return &common.Originator{
Originator: originator,
OriginatorType: kind,
}
}
func formatSPDXExternalRefs(p pkg.Package) (refs []*spdx.PackageExternalReference) {
for _, ref := range ExternalRefs(p) {
refs = append(refs, &spdx.PackageExternalReference{
@ -414,6 +436,7 @@ func toFiles(s sbom.SBOM) (results []*spdx.File) {
}
func toFileChecksums(digests []file.Digest) (checksums []common.Checksum) {
checksums = make([]common.Checksum, 0, len(digests))
for _, digest := range digests {
checksums = append(checksums, common.Checksum{
Algorithm: toChecksumAlgorithm(digest.Algorithm),
@ -462,3 +485,55 @@ func toFileTypes(metadata *source.FileMetadata) (ty []string) {
return ty
}
// TODO: handle SPDX excludes file case
// f file is an "excludes" file, skip it /* exclude SPDX analysis file(s) */
// see: https://spdx.github.io/spdx-spec/v2.3/package-information/#79-package-verification-code-field
// the above link contains the SPDX algorithm for a package verification code
func newPackageVerificationCode(p pkg.Package, sbom sbom.SBOM) *common.PackageVerificationCode {
// key off of the contains relationship;
// spdx validator will fail if a package claims to contain a file but no sha1 provided
// if a sha1 for a file is provided then the validator will fail if the package does not have
// a package verification code
coordinates := sbom.CoordinatesForPackage(p, artifact.ContainsRelationship)
var digests []file.Digest
for _, c := range coordinates {
digest := sbom.Artifacts.FileDigests[c]
if len(digest) == 0 {
continue
}
var d file.Digest
for _, digest := range digest {
if digest.Algorithm == "sha1" {
d = digest
break
}
}
digests = append(digests, d)
}
if len(digests) == 0 {
return nil
}
// sort templist in ascending order by SHA1 value
sort.SliceStable(digests, func(i, j int) bool {
return digests[i].Value < digests[j].Value
})
// filelist = templist with "/n"s removed. /* ordered sequence of SHA1 values with no separators
var b strings.Builder
for _, digest := range digests {
b.WriteString(digest.Value)
}
//nolint:gosec
hasher := sha1.New()
_, _ = hasher.Write([]byte(b.String()))
return &common.PackageVerificationCode{
// 7.9.1: Package Verification Code Value
// Cardinality: mandatory, one
Value: fmt.Sprintf("%+x", hasher.Sum(nil)),
}
}

View file

@ -12,6 +12,7 @@ import (
"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/sbom"
"github.com/anchore/syft/syft/source"
)
@ -369,6 +370,7 @@ func Test_fileIDsForPackage(t *testing.T) {
}
func Test_H1Digest(t *testing.T) {
sbom := sbom.SBOM{}
tests := []struct {
name string
pkg pkg.Package
@ -415,7 +417,7 @@ func Test_H1Digest(t *testing.T) {
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
catalog := pkg.NewCatalog(test.pkg)
pkgs := toPackages(catalog)
pkgs := toPackages(catalog, sbom)
require.Len(t, pkgs, 1)
for _, p := range pkgs {
if test.expectedDigest == "" {

View file

@ -3,14 +3,14 @@
"dataLicense": "CC0-1.0",
"SPDXID": "SPDXRef-DOCUMENT",
"name": "user-image-input",
"documentNamespace": "https://anchore.com/syft/image/user-image-input-55ad4afc-ecdc-46a4-8bc3-36b3e72da5d1",
"documentNamespace": "https://anchore.com/syft/image/user-image-input-ace88a38-4633-4bff-8fa3-8ae929dab37d",
"creationInfo": {
"licenseListVersion": "3.18",
"licenseListVersion": "3.19",
"creators": [
"Organization: Anchore, Inc",
"Tool: syft-v0.42.0-bogus"
],
"created": "2022-11-19T13:46:57Z",
"created": "2022-12-14T18:21:40Z",
"comment": ""
},
"packages": [
@ -70,7 +70,7 @@
"fileTypes": [
"OTHER"
],
"checksums": null,
"checksums": [],
"licenseConcluded": "NOASSERTION",
"copyrightText": ""
},
@ -80,7 +80,7 @@
"fileTypes": [
"OTHER"
],
"checksums": null,
"checksums": [],
"licenseConcluded": "NOASSERTION",
"copyrightText": ""
},
@ -90,7 +90,7 @@
"fileTypes": [
"OTHER"
],
"checksums": null,
"checksums": [],
"licenseConcluded": "NOASSERTION",
"copyrightText": ""
},
@ -100,7 +100,7 @@
"fileTypes": [
"OTHER"
],
"checksums": null,
"checksums": [],
"licenseConcluded": "NOASSERTION",
"copyrightText": ""
},
@ -110,7 +110,7 @@
"fileTypes": [
"OTHER"
],
"checksums": null,
"checksums": [],
"licenseConcluded": "NOASSERTION",
"copyrightText": ""
},
@ -120,7 +120,7 @@
"fileTypes": [
"OTHER"
],
"checksums": null,
"checksums": [],
"licenseConcluded": "NOASSERTION",
"copyrightText": ""
}

View file

@ -3,6 +3,8 @@ package sbom
import (
"sort"
"golang.org/x/exp/slices"
"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/linux"
@ -65,6 +67,36 @@ func (s SBOM) AllCoordinates() []source.Coordinates {
return set.ToSlice()
}
// RelationshipsForPackage returns all relationships for the provided types.
// If no types are provided, all relationships for the package are returned.
func (s SBOM) RelationshipsForPackage(p pkg.Package, rt ...artifact.RelationshipType) []artifact.Relationship {
if len(rt) == 0 {
rt = artifact.AllRelationshipTypes()
}
var relationships []artifact.Relationship
for _, relationship := range s.Relationships {
// check if the relationship is one we're searching for; rt is inclusive
idx := slices.IndexFunc(rt, func(r artifact.RelationshipType) bool { return relationship.Type == r })
if relationship.From.ID() == p.ID() && idx != -1 {
relationships = append(relationships, relationship)
}
}
return relationships
}
// CoordinatesForPackage returns all coordinates for the provided package for provided relationship types
// If no types are provided, all relationship types are considered.
func (s SBOM) CoordinatesForPackage(p pkg.Package, rt ...artifact.RelationshipType) []source.Coordinates {
var coordinates []source.Coordinates
for _, relationship := range s.RelationshipsForPackage(p, rt...) {
cords := extractCoordinates(relationship)
coordinates = append(coordinates, cords...)
}
return coordinates
}
func extractCoordinates(relationship artifact.Relationship) (results []source.Coordinates) {
if coordinates, exists := relationship.From.(source.Coordinates); exists {
results = append(results, coordinates)

View file

@ -1,8 +1,6 @@
package cli
import (
"bufio"
"io"
"net/http"
"os"
"os/exec"
@ -15,30 +13,6 @@ import (
"github.com/stretchr/testify/require"
)
func runAndShow(t *testing.T, cmd *exec.Cmd) {
t.Helper()
stderr, err := cmd.StderrPipe()
require.NoErrorf(t, err, "could not get stderr: +v", err)
stdout, err := cmd.StdoutPipe()
require.NoErrorf(t, err, "could not get stdout: +v", err)
err = cmd.Start()
require.NoErrorf(t, err, "failed to start cmd: %+v", err)
show := func(label string, reader io.ReadCloser) {
scanner := bufio.NewScanner(reader)
scanner.Split(bufio.ScanLines)
for scanner.Scan() {
t.Logf("%s: %s", label, scanner.Text())
}
}
show("out", stdout)
show("err", stderr)
}
func TestCosignWorkflow(t *testing.T) {
// found under test-fixtures/registry/Makefile
img := "localhost:5000/attest:latest"
@ -113,7 +87,6 @@ func TestCosignWorkflow(t *testing.T) {
cmd = exec.Command("make", "push")
cmd.Dir = fixturesPath
runAndShow(t, cmd)
},
cleanup: func() {
cwd, err := os.Getwd()

View file

@ -15,7 +15,6 @@ import (
const spdxJsonSchemaPath = "schema/spdx-json"
func TestSPDXJSONSchema(t *testing.T) {
imageFixture := func(t *testing.T) string {
fixtureImageName := "image-pkg-coverage"
imagetest.GetFixtureImage(t, "docker-archive", fixtureImageName)
@ -35,11 +34,6 @@ func TestSPDXJSONSchema(t *testing.T) {
args: []string{"-o", "spdx-json"},
fixture: imageFixture,
},
{
name: "power-user:image:docker-archive:pkg-coverage",
subcommand: "power-user",
fixture: imageFixture,
},
{
name: "packages:dir:pkg-coverage",
subcommand: "packages",

View file

@ -0,0 +1,83 @@
package cli
import (
"fmt"
"os"
"os/exec"
"path"
"path/filepath"
"testing"
"github.com/stretchr/testify/require"
)
func TestSpdxValidationTooling(t *testing.T) {
tests := []struct {
name string
syftArgs []string
images []string
setup func(t *testing.T)
env map[string]string
assertions []traitAssertion
}{
{
name: "spdx validation tooling tag value",
syftArgs: []string{"packages", "-o", "spdx"},
images: []string{"alpine:latest", "photon:3.0", "debian:latest"},
env: map[string]string{
"SYFT_FILE_METADATA_CATALOGER_ENABLED": "true",
"SYFT_FILE_METADATA_DIGESTS": "sha1",
},
setup: func(t *testing.T) {
cwd, err := os.Getwd()
require.NoError(t, err)
fixturesPath := filepath.Join(cwd, "test-fixtures", "image-java-spdx-tools")
buildCmd := exec.Command("make", "build")
buildCmd.Dir = fixturesPath
err = buildCmd.Run()
require.NoError(t, err)
},
assertions: []traitAssertion{
assertSuccessfulReturnCode,
},
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
// build the validation image
test.setup(t)
for _, image := range test.images {
args := append(test.syftArgs, image)
cmd, stdout, stderr := runSyft(t, test.env, args...)
for _, traitFn := range test.assertions {
traitFn(t, stdout, stderr, cmd.ProcessState.ExitCode())
}
cwd, err := os.Getwd()
require.NoError(t, err)
f, err := os.CreateTemp(t.TempDir(), "temp")
require.NoError(t, err)
// spdx tooling only takes a file with suffix spdx
rename := path.Join(path.Dir(f.Name()), fmt.Sprintf("%s.spdx", path.Base(f.Name())))
err = os.Rename(f.Name(), rename)
require.NoError(t, err)
// write file for validation
_, err = f.Write([]byte(stdout))
require.NoError(t, err)
// validate against spdx java tooling
fileArg := fmt.Sprintf("FILE=%s", rename)
mountArg := fmt.Sprintf("BASE=%s", path.Base(rename))
validateCmd := exec.Command("make", "validate", fileArg, mountArg)
validateCmd.Dir = filepath.Join(cwd, "test-fixtures", "image-java-spdx-tools")
runAndShow(t, validateCmd)
}
})
}
}

View file

@ -0,0 +1,7 @@
FROM cgr.dev/chainguard/jdk
RUN wget https://github.com/spdx/tools-java/releases/download/v1.1.3/tools-java-1.1.3.zip && \
unzip tools-java-1.1.3.zip && \
rm tools-java-1.1.3.zip
ENTRYPOINT ["java", "-jar", "tools-java-1.1.3-jar-with-dependencies.jar"]

View file

@ -0,0 +1,8 @@
all: build validate
.PHONY: build
build:
docker build -t spdx-java-tools:latest .
validate:
docker run --rm -v ${FILE}:/home/build/${BASE} spdx-java-tools:latest Verify ${BASE}

View file

@ -1,8 +1,10 @@
package cli
import (
"bufio"
"bytes"
"fmt"
"io"
"math"
"os"
"os/exec"
@ -14,9 +16,35 @@ import (
"testing"
"time"
"github.com/stretchr/testify/require"
"github.com/anchore/stereoscope/pkg/imagetest"
)
func runAndShow(t *testing.T, cmd *exec.Cmd) {
t.Helper()
stderr, err := cmd.StderrPipe()
require.NoErrorf(t, err, "could not get stderr: +v", err)
stdout, err := cmd.StdoutPipe()
require.NoErrorf(t, err, "could not get stdout: +v", err)
err = cmd.Start()
require.NoErrorf(t, err, "failed to start cmd: %+v", err)
show := func(label string, reader io.ReadCloser) {
scanner := bufio.NewScanner(reader)
scanner.Split(bufio.ScanLines)
for scanner.Scan() {
t.Logf("%s: %s", label, scanner.Text())
}
}
show("out", stdout)
show("err", stderr)
}
func setupPKI(t *testing.T, pw string) func() {
err := os.Setenv("COSIGN_PASSWORD", pw)
if err != nil {