add python author and emil fields + add metadata type

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>
This commit is contained in:
Alex Goodman 2020-10-21 10:27:20 -04:00
parent 0ce8701e73
commit 7fc926d40d
No known key found for this signature in database
GPG key ID: 5CB45AE22BAB7EA7
23 changed files with 302 additions and 158 deletions

View file

@ -49,11 +49,12 @@ func parseApkDB(_ string, reader io.Reader) ([]pkg.Package, error) {
}
if metadata != nil {
packages = append(packages, pkg.Package{
Name: metadata.Package,
Version: metadata.Version,
Licenses: strings.Split(metadata.License, " "),
Type: pkg.ApkPkg,
Metadata: *metadata,
Name: metadata.Package,
Version: metadata.Version,
Licenses: strings.Split(metadata.License, " "),
Type: pkg.ApkPkg,
MetadataType: pkg.ApkMetadataType,
Metadata: *metadata,
})
}
}

View file

@ -30,10 +30,11 @@ func parseDpkgStatus(_ string, reader io.Reader) ([]pkg.Package, error) {
return nil, err
}
packages = append(packages, pkg.Package{
Name: entry.Package,
Version: entry.Version,
Type: pkg.DebPkg,
Metadata: entry,
Name: entry.Package,
Version: entry.Version,
Type: pkg.DebPkg,
MetadataType: pkg.DpkgMetadataType,
Metadata: entry,
})
}

View file

@ -142,10 +142,11 @@ func (j *archiveParser) discoverMainPackage() (*pkg.Package, error) {
}
return &pkg.Package{
Name: selectName(manifest, j.fileInfo),
Version: selectVersion(manifest, j.fileInfo),
Language: pkg.Java,
Type: pkg.JavaPkg,
Name: selectName(manifest, j.fileInfo),
Version: selectVersion(manifest, j.fileInfo),
Language: pkg.Java,
Type: pkg.JavaPkg,
MetadataType: pkg.JavaMetadataType,
Metadata: pkg.JavaMetadata{
Manifest: manifest,
},
@ -177,10 +178,11 @@ func (j *archiveParser) discoverPkgsFromPomProperties(parentPkg *pkg.Package) ([
// discovered props = new package
p := pkg.Package{
Name: propsObj.ArtifactID,
Version: propsObj.Version,
Language: pkg.Java,
Type: pkg.JavaPkg,
Name: propsObj.ArtifactID,
Version: propsObj.Version,
Language: pkg.Java,
Type: pkg.JavaPkg,
MetadataType: pkg.JavaMetadataType,
Metadata: pkg.JavaMetadata{
PomProperties: propsObj,
Parent: parentPkg,

View file

@ -38,12 +38,13 @@ func parsePackageJSON(_ string, reader io.Reader) ([]pkg.Package, error) {
}
packages = append(packages, pkg.Package{
Name: p.Name,
Version: p.Version,
Licenses: []string{p.License},
Language: pkg.JavaScript,
Type: pkg.NpmPkg,
Metadata: pkg.NpmMetadata{
Name: p.Name,
Version: p.Version,
Licenses: []string{p.License},
Language: pkg.JavaScript,
Type: pkg.NpmPkg,
MetadataType: pkg.NpmPackageJsonMetadataType,
Metadata: pkg.NpmPackageJsonMetadata{
Author: p.Author,
Homepage: p.Homepage,
},

View file

@ -15,7 +15,7 @@ func TestParsePackageJSON(t *testing.T) {
Type: pkg.NpmPkg,
Licenses: []string{"Artistic-2.0"},
Language: pkg.JavaScript,
Metadata: pkg.NpmMetadata{
Metadata: pkg.NpmPackageJsonMetadata{
Author: "Isaac Z. Schlueter <i@izs.me> (http://blog.izs.me)",
Homepage: "https://docs.npmjs.com/",
},

View file

@ -7,16 +7,6 @@ import (
"github.com/anchore/syft/syft/cataloger/common"
)
// NewPythonPackageCataloger returns a new cataloger for python packages within egg or wheel installation directories.
func NewPythonPackageCataloger() *common.GenericCataloger {
globParsers := map[string]common.ParserFn{
"**/*egg-info/PKG-INFO": parseWheelOrEggMetadata,
"**/*dist-info/METADATA": parseWheelOrEggMetadata,
}
return common.NewGenericCataloger(nil, globParsers, "python-package-cataloger")
}
// NewPythonIndexCataloger returns a new cataloger for python packages referenced from poetry lock files, requirements.txt files, and setup.py files.
func NewPythonIndexCataloger() *common.GenericCataloger {
globParsers := map[string]common.ParserFn{

View file

@ -0,0 +1,64 @@
package python
import (
"fmt"
"path/filepath"
"github.com/anchore/stereoscope/pkg/file"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/scope"
)
const wheelGlob = "**/*dist-info/METADATA"
type PackageCataloger struct {
globs []string
}
// NewPythonPackageCataloger returns a new cataloger for python packages within egg or wheel installation directories.
func NewPythonPackageCataloger() *PackageCataloger {
//globParsers := map[string]common.ParserFn{
// "**/*egg-info/PKG-INFO": parseWheelOrEggMetadata,
// "**/*dist-info/METADATA": parseWheelOrEggMetadata,
//}
return &PackageCataloger{}
}
func (c *PackageCataloger) Name() string {
return "python-package-cataloger"
}
func (c *PackageCataloger) Catalog(resolver scope.Resolver) ([]pkg.Package, error) {
return c.catalogWheels(resolver)
}
func (c *PackageCataloger) catalogWheels(resolver scope.Resolver) ([]pkg.Package, error) {
fileMatches, err := resolver.FilesByGlob(wheelGlob)
if err != nil {
return nil, fmt.Errorf("failed to find files by glob: %s", wheelGlob)
}
var pkgs []pkg.Package
for _, ref := range fileMatches {
p, err := c.catalogWheel(resolver, ref)
if err != nil {
return nil, fmt.Errorf("unable to catalog python wheel=%+v: %w", ref.Path, err)
}
pkgs = append(pkgs, p)
}
return pkgs, nil
}
func (c *PackageCataloger) catalogWheel(resolver scope.Resolver, wheelRef file.Reference) (pkg.Package, error) {
// we've been given a file reference to a specific wheel METADATA file. note: this may be for a directory
// or for an image... for an image the METADATA file may be present within multiple layers, so it is important
// to reconcile the RECORD path to the same layer (or the next adjacent lower layer).
recordPath := filepath.Join(filepath.Dir(string(wheelRef.Path)), "RECORD")
// problem! we don't know which is the right discovered path relative to the given METADATA file! (which layer?)
discoveredPaths, err := resolver.FilesByPath(file.Path(recordPath))
}

View file

@ -0,0 +1,73 @@
package python
import (
"os"
"testing"
"github.com/anchore/syft/syft/pkg"
"github.com/go-test/deep"
)
func TestPythonPackageCataloger(t *testing.T) {
tests := []struct {
Fixture string
ExpectedMetadata []pkg.Package
}{
{
Fixture: "test-fixtures/",
ExpectedMetadata: []pkg.Package{
{
Name: "requests",
Version: "2.22.0",
Type: pkg.PythonPkg,
Language: pkg.Python,
Licenses: []string{"Apache 2.0"},
MetadataType: pkg.PythonEggWheelMetadataType,
Metadata: pkg.EggWheelMetadata{
Name: "requests",
Version: "2.22.0",
License: "Apache 2.0",
Platform: "UNKNOWN",
Author: "Kenneth Reitz",
AuthorEmail: "me@kennethreitz.org",
},
},
{
Name: "Pygments",
Version: "2.6.1",
Type: pkg.PythonPkg,
Language: pkg.Python,
Licenses: []string{"BSD License"},
MetadataType: pkg.PythonEggWheelMetadataType,
Metadata: pkg.EggWheelMetadata{
Name: "Pygments",
Version: "2.6.1",
License: "BSD License",
Platform: "any",
Author: "Georg Brandl",
AuthorEmail: "georg@python.org",
},
},
},
},
}
for _, test := range tests {
t.Run(test.Fixture, func(t *testing.T) {
fixture, err := os.Open(test.Fixture)
if err != nil {
t.Fatalf("failed to open fixture: %+v", err)
}
actual, err := parseWheelOrEggMetadata(fixture.Name(), fixture)
if err != nil {
t.Fatalf("failed to parse python package: %+v", err)
}
for _, d := range deep.Equal(actual, &test.ExpectedMetadata) {
t.Errorf("diff: %+v", d)
}
})
}
}

View file

@ -4,9 +4,32 @@ import (
"os"
"testing"
"github.com/go-test/deep"
"github.com/anchore/syft/syft/pkg"
)
func assertPackagesEqual(t *testing.T, actual []pkg.Package, expected map[string]pkg.Package) {
t.Helper()
if len(actual) != len(expected) {
for _, a := range actual {
t.Log(" ", a)
}
t.Fatalf("unexpected package count: %d!=%d", len(actual), len(expected))
}
for _, a := range actual {
expectedPkg, ok := expected[a.Name]
if !ok {
t.Errorf("unexpected package found: '%s'", a.Name)
}
for _, d := range deep.Equal(a, expectedPkg) {
t.Errorf("diff: %+v", d)
}
}
}
func TestParseRequirementsTxt(t *testing.T) {
expected := map[string]pkg.Package{
"foo": {
@ -34,6 +57,6 @@ func TestParseRequirementsTxt(t *testing.T) {
t.Fatalf("failed to parse requirements: %+v", err)
}
assertPkgsEqual(t, actual, expected)
assertPackagesEqual(t, actual, expected)
}

View file

@ -55,6 +55,6 @@ func TestParseSetup(t *testing.T) {
t.Fatalf("failed to parse requirements: %+v", err)
}
assertPkgsEqual(t, actual, expected)
assertPackagesEqual(t, actual, expected)
}

View file

@ -6,16 +6,14 @@ import (
"io"
"strings"
"github.com/anchore/syft/syft/cataloger/common"
"github.com/mitchellh/mapstructure"
"github.com/anchore/syft/syft/pkg"
)
// integrity check
var _ common.ParserFn = parseWheelOrEggMetadata
// parseWheelOrEggMetadata takes a Python Egg or Wheel (which share the same format and values for our purposes),
// returning all Python packages listed.
func parseWheelOrEggMetadata(_ string, reader io.Reader) ([]pkg.Package, error) {
func parseWheelOrEggMetadata(_ string, reader io.Reader) (*pkg.EggWheelMetadata, error) {
fields := make(map[string]string)
var key string
@ -64,16 +62,11 @@ func parseWheelOrEggMetadata(_ string, reader io.Reader) ([]pkg.Package, error)
return nil, fmt.Errorf("failed to parse python wheel/egg: %w", err)
}
p := pkg.Package{
Name: fields["Name"],
Version: fields["Version"],
Language: pkg.Python,
Type: pkg.PythonPkg,
var metadata pkg.EggWheelMetadata
if err := mapstructure.Decode(fields, &metadata); err != nil {
return nil, fmt.Errorf("unable to parse APK metadata: %w", err)
}
if license, ok := fields["License"]; ok && license != "" {
p.Licenses = []string{license}
}
return []pkg.Package{p}, nil
return &metadata, nil
}

View file

@ -5,89 +5,54 @@ import (
"testing"
"github.com/anchore/syft/syft/pkg"
"github.com/go-test/deep"
)
func assertPkgsEqual(t *testing.T, actual []pkg.Package, expected map[string]pkg.Package) {
t.Helper()
if len(actual) != len(expected) {
for _, a := range actual {
t.Log(" ", a)
}
t.Fatalf("unexpected package count: %d!=%d", len(actual), len(expected))
}
for _, a := range actual {
expectedPkg, ok := expected[a.Name]
if !ok {
t.Errorf("unexpected package found: '%s'", a.Name)
}
if expectedPkg.Version != a.Version {
t.Errorf("unexpected package version: '%s'", a.Version)
}
if a.Language != expectedPkg.Language {
t.Errorf("bad language: '%+v'", a.Language)
}
if a.Type != expectedPkg.Type {
t.Errorf("bad package type: %+v", a.Type)
}
if len(a.Licenses) < len(expectedPkg.Licenses) {
t.Errorf("bad package licenses count: '%+v'", a.Licenses)
}
if len(a.Licenses) > 0 {
if a.Licenses[0] != expectedPkg.Licenses[0] {
t.Errorf("bad package licenses: '%+v'", a.Licenses)
}
}
}
}
func TestParseEggMetadata(t *testing.T) {
expected := map[string]pkg.Package{
"requests": {
Name: "requests",
Version: "2.22.0",
Language: pkg.Python,
Type: pkg.PythonPkg,
Licenses: []string{"Apache 2.0"},
tests := []struct {
Fixture string
ExpectedMetadata pkg.EggWheelMetadata
}{
{
Fixture: "test-fixtures/egg-info/PKG-INFO",
ExpectedMetadata: pkg.EggWheelMetadata{
Name: "requests",
Version: "2.22.0",
License: "Apache 2.0",
Platform: "UNKNOWN",
Author: "Kenneth Reitz",
AuthorEmail: "me@kennethreitz.org",
},
},
{
Fixture: "test-fixtures/dist-info/METADATA",
ExpectedMetadata: pkg.EggWheelMetadata{
Name: "Pygments",
Version: "2.6.1",
License: "BSD License",
Platform: "any",
Author: "Georg Brandl",
AuthorEmail: "georg@python.org",
},
},
}
fixture, err := os.Open("test-fixtures/egg-info/PKG-INFO")
if err != nil {
t.Fatalf("failed to open fixture: %+v", err)
for _, test := range tests {
t.Run(test.Fixture, func(t *testing.T) {
fixture, err := os.Open(test.Fixture)
if err != nil {
t.Fatalf("failed to open fixture: %+v", err)
}
actual, err := parseWheelOrEggMetadata(fixture.Name(), fixture)
if err != nil {
t.Fatalf("failed to parse egg-info: %+v", err)
}
for _, d := range deep.Equal(actual, &test.ExpectedMetadata) {
t.Errorf("diff: %+v", d)
}
})
}
actual, err := parseWheelOrEggMetadata(fixture.Name(), fixture)
if err != nil {
t.Fatalf("failed to parse egg-info: %+v", err)
}
assertPkgsEqual(t, actual, expected)
}
func TestParseWheelMetadata(t *testing.T) {
expected := map[string]pkg.Package{
"Pygments": {
Name: "Pygments",
Version: "2.6.1",
Language: pkg.Python,
Type: pkg.PythonPkg,
Licenses: []string{"BSD License"},
},
}
fixture, err := os.Open("test-fixtures/dist-info/METADATA")
if err != nil {
t.Fatalf("failed to open fixture: %+v", err)
}
actual, err := parseWheelOrEggMetadata(fixture.Name(), fixture)
if err != nil {
t.Fatalf("failed to parse dist-info: %+v", err)
}
assertPkgsEqual(t, actual, expected)
}

View file

@ -52,8 +52,9 @@ func parseRpmDB(_ string, reader io.Reader) ([]pkg.Package, error) {
Name: entry.Name,
Version: fmt.Sprintf("%s-%s", entry.Version, entry.Release), // this is what engine does
//Version: fmt.Sprintf("%d:%s-%s.%s", entry.Epoch, entry.Version, entry.Release, entry.Arch),
Type: pkg.RpmPkg,
Metadata: pkg.RpmMetadata{
Type: pkg.RpmPkg,
MetadataType: pkg.RpmdbMetadataType,
Metadata: pkg.RpmdbMetadata{
Name: entry.Name,
Version: entry.Version,
Epoch: entry.Epoch,

View file

@ -1,10 +1,11 @@
package rpmdb
import (
"github.com/anchore/syft/syft/pkg"
"github.com/go-test/deep"
"os"
"testing"
"github.com/anchore/syft/syft/pkg"
"github.com/go-test/deep"
)
func TestParseRpmDB(t *testing.T) {
@ -13,7 +14,7 @@ func TestParseRpmDB(t *testing.T) {
Name: "dive",
Version: "0.9.2-1",
Type: pkg.RpmPkg,
Metadata: pkg.RpmMetadata{
Metadata: pkg.RpmdbMetadata{
Name: "dive",
Epoch: 0,
Arch: "x86_64",

View file

@ -96,12 +96,13 @@ func parseGemSpecEntries(_ string, reader io.Reader) ([]pkg.Package, error) {
}
pkgs = append(pkgs, pkg.Package{
Name: metadata.Name,
Version: metadata.Version,
Licenses: metadata.Licenses,
Language: pkg.Ruby,
Type: pkg.GemPkg,
Metadata: metadata,
Name: metadata.Name,
Version: metadata.Version,
Licenses: metadata.Licenses,
Language: pkg.Ruby,
Type: pkg.GemPkg,
MetadataType: pkg.GemMetadataType,
Metadata: metadata,
})
}

View file

@ -0,0 +1,11 @@
package pkg
// EggWheelMetadata represents all captured data for a python egg or wheel package.
type EggWheelMetadata struct {
Name string `json:"name" mapstruct:"Name"`
Version string `json:"version" mapstruct:"Version"`
License string `json:"license" mapstruct:"License"`
Author string `json:"author" mapstruct:"Author"`
AuthorEmail string `json:"authorEmail" mapstruct:"Author-email"`
Platform string `json:"platform" mapstruct:"Platform"`
}

14
syft/pkg/metadata.go Normal file
View file

@ -0,0 +1,14 @@
package pkg
type MetadataType string
const (
UnknownMetadataType MetadataType = "UnknownMetadata"
ApkMetadataType MetadataType = "apk-metadata"
DpkgMetadataType MetadataType = "dpkg-metadata"
GemMetadataType MetadataType = "gem-metadata"
JavaMetadataType MetadataType = "java-metadata"
NpmPackageJsonMetadataType MetadataType = "npm-package-json-metadata"
RpmdbMetadataType MetadataType = "rpmdb-metadata"
PythonEggWheelMetadataType MetadataType = "python-egg-wheel-metadata"
)

View file

@ -1,7 +1,7 @@
package pkg
// NpmMetadata holds extra information that is used in pkg.Package
type NpmMetadata struct {
// NpmPackageJsonMetadata holds extra information that is used in pkg.Package
type NpmPackageJsonMetadata struct {
Name string `mapstructure:"name" json:"name"`
Version string `mapstructure:"version" json:"version"`
Files []string `mapstructure:"files" json:"files"`

View file

@ -23,10 +23,11 @@ type Package struct {
FoundBy string `json:"foundBy"` // the specific cataloger that discovered this package
Source []file.Reference `json:"sources"` // the locations that lead to the discovery of this package (note: this is not necessarily the locations that make up this package)
// TODO: should we move licenses into metadata?
Licenses []string `json:"licenses"` // licenses discovered with the package metadata
Language Language `json:"language"` // the language ecosystem this package belongs to (e.g. JavaScript, Python, etc)
Type Type `json:"type"` // the package type (e.g. Npm, Yarn, Egg, Wheel, Rpm, Deb, etc)
Metadata interface{} `json:"metadata,omitempty"` // additional data found while parsing the package source
Licenses []string `json:"licenses"` // licenses discovered with the package metadata
Language Language `json:"language"` // the language ecosystem this package belongs to (e.g. JavaScript, Python, etc)
Type Type `json:"type"` // the package type (e.g. Npm, Yarn, Python, Rpm, Deb, etc)
MetadataType MetadataType `json:"metadataType"` // the shape of the additional data in the "metadata" field
Metadata interface{} `json:"metadata,omitempty"` // additional data found while parsing the package source
}
// ID returns the package ID, which is unique relative to a package catalog.

View file

@ -93,7 +93,7 @@ func TestPackage_pURL(t *testing.T) {
Name: "bad-name",
Version: "bad-v0.1.0",
Type: RpmPkg,
Metadata: RpmMetadata{
Metadata: RpmdbMetadata{
Name: "name",
Version: "v0.1.0",
Epoch: 2,

View file

@ -7,8 +7,8 @@ import (
"github.com/package-url/packageurl-go"
)
// RpmMetadata represents all captured data for a RPM DB package entry.
type RpmMetadata struct {
// RpmdbMetadata represents all captured data for a RPM DB package entry.
type RpmdbMetadata struct {
Name string `json:"name"`
Version string `json:"version"`
Epoch int `json:"epoch"`
@ -20,7 +20,7 @@ type RpmMetadata struct {
Vendor string `json:"vendor"`
}
func (m RpmMetadata) PackageURL(d distro.Distro) string {
func (m RpmdbMetadata) PackageURL(d distro.Distro) string {
pURL := packageurl.NewPackageURL(
packageurl.TypeRPM,
d.Type.String(),

View file

@ -1,22 +1,23 @@
package pkg
import (
"testing"
"github.com/anchore/syft/syft/distro"
"github.com/sergi/go-diff/diffmatchpatch"
"testing"
)
func TestRpmMetadata_pURL(t *testing.T) {
tests := []struct {
distro distro.Distro
metadata RpmMetadata
metadata RpmdbMetadata
expected string
}{
{
distro: distro.Distro{
Type: distro.CentOS,
},
metadata: RpmMetadata{
metadata: RpmdbMetadata{
Name: "p",
Version: "v",
Arch: "a",
@ -29,7 +30,7 @@ func TestRpmMetadata_pURL(t *testing.T) {
distro: distro.Distro{
Type: distro.RedHat,
},
metadata: RpmMetadata{
metadata: RpmdbMetadata{
Name: "p",
Version: "v",
Arch: "a",

View file

@ -3,11 +3,12 @@ package cyclonedx
import (
"bytes"
"flag"
"github.com/anchore/stereoscope/pkg/imagetest"
"github.com/anchore/syft/syft/distro"
"regexp"
"testing"
"github.com/anchore/stereoscope/pkg/imagetest"
"github.com/anchore/syft/syft/distro"
"github.com/anchore/go-testutils"
"github.com/anchore/stereoscope/pkg/file"
"github.com/anchore/syft/syft/pkg"
@ -109,7 +110,7 @@ func TestCycloneDxImgsPresenter(t *testing.T) {
},
Type: pkg.RpmPkg,
FoundBy: "the-cataloger-1",
Metadata: pkg.RpmMetadata{
Metadata: pkg.RpmdbMetadata{
Name: "package1",
Epoch: 0,
Arch: "x86_64",
@ -133,7 +134,7 @@ func TestCycloneDxImgsPresenter(t *testing.T) {
"MIT",
"Apache-v2",
},
Metadata: pkg.RpmMetadata{
Metadata: pkg.RpmdbMetadata{
Name: "package2",
Epoch: 0,
Arch: "x86_64",