allow for python metadata fields to be optional

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>
This commit is contained in:
Alex Goodman 2020-10-22 13:50:24 -04:00
parent 2e5ff4a995
commit 7d55bca0a0
No known key found for this signature in database
GPG key ID: 5CB45AE22BAB7EA7
6 changed files with 97 additions and 57 deletions

View file

@ -491,6 +491,9 @@
"release": {
"type": "string"
},
"sitePackagesRootPath": {
"type": "string"
},
"size": {
"type": "integer"
},
@ -500,6 +503,12 @@
"sourceRpm": {
"type": "string"
},
"topLevelPackages": {
"items": {
"type": "string"
},
"type": "array"
},
"url": {
"type": "string"
},

View file

@ -25,10 +25,12 @@ func NewPythonPackageCataloger() *PackageCataloger {
return &PackageCataloger{}
}
// Name returns a string that uniquely describes a cataloger
func (c *PackageCataloger) Name() string {
return "python-package-cataloger"
}
// Catalog is given an object to resolve file references and content, this function returns any discovered Packages after analyzing python egg and wheel installations.
func (c *PackageCataloger) Catalog(resolver scope.Resolver) ([]pkg.Package, error) {
// nolint:prealloc
var fileMatches []file.Reference
@ -54,19 +56,33 @@ func (c *PackageCataloger) Catalog(resolver scope.Resolver) ([]pkg.Package, erro
return pkgs, nil
}
func (c *PackageCataloger) assembleEggOrWheelMetadata(resolver scope.Resolver, metadataRef file.Reference) (*pkg.PythonPackageMetadata, []file.Reference, error) {
var sources = []file.Reference{metadataRef}
metadataContents, err := resolver.FileContentsByRef(metadataRef)
// catalogEggOrWheel takes the primary metadata file reference and returns the python package it represents.
func (c *PackageCataloger) catalogEggOrWheel(resolver scope.Resolver, metadataRef file.Reference) (*pkg.Package, error) {
metadata, sources, err := c.assembleEggOrWheelMetadata(resolver, metadataRef)
if err != nil {
return nil, nil, err
return nil, err
}
metadata, err := parseWheelOrEggMetadata(metadataRef.Path, strings.NewReader(metadataContents))
if err != nil {
return nil, nil, err
var licenses []string
if metadata.License != "" {
licenses = []string{metadata.License}
}
return &pkg.Package{
Name: metadata.Name,
Version: metadata.Version,
FoundBy: c.Name(),
Source: sources,
Licenses: licenses,
Language: pkg.Python,
Type: pkg.PythonPkg,
MetadataType: pkg.PythonPackageMetadataType,
Metadata: *metadata,
}, nil
}
// fetchRecordFiles finds a corresponding RECORD file for the given python package metadata file and returns the set of file records contained.
func (c *PackageCataloger) fetchRecordFiles(resolver scope.Resolver, metadataRef file.Reference) (files []pkg.PythonFileRecord, sources []file.Reference, err error) {
// we've been given a file reference to a specific wheel METADATA file. note: this may be for a directory
// or for an image... for an image the METADATA file may be present within multiple layers, so it is important
// to reconcile the RECORD path to the same layer (or the next adjacent lower layer).
@ -92,10 +108,13 @@ func (c *PackageCataloger) assembleEggOrWheelMetadata(resolver scope.Resolver, m
return nil, nil, err
}
// append the record files list to the metadata
metadata.Files = records
files = append(files, records...)
}
return files, sources, nil
}
// fetchTopLevelPackages finds a corresponding top_level.txt file for the given python package metadata file and returns the set of package names contained.
func (c *PackageCataloger) fetchTopLevelPackages(resolver scope.Resolver, metadataRef file.Reference) (pkgs []string, sources []file.Reference, err error) {
// a top_level.txt file specifies the python top-level packages (provided by this python package) installed into site-packages
parentDir := filepath.Dir(string(metadataRef.Path))
topLevelPath := filepath.Join(parentDir, "top_level.txt")
@ -107,47 +126,54 @@ func (c *PackageCataloger) assembleEggOrWheelMetadata(resolver scope.Resolver, m
return nil, nil, fmt.Errorf("missing python package top_level.txt (package=%q)", string(metadataRef.Path))
}
sources = append(sources, *topLevelRef)
topLevelContents, err := resolver.FileContentsByRef(*topLevelRef)
if err != nil {
return nil, nil, err
}
// nolint:prealloc
var topLevelPackages []string
scanner := bufio.NewScanner(strings.NewReader(topLevelContents))
for scanner.Scan() {
topLevelPackages = append(topLevelPackages, scanner.Text())
pkgs = append(pkgs, scanner.Text())
}
if err := scanner.Err(); err != nil {
return nil, nil, fmt.Errorf("could not read python package top_level.txt: %w", err)
}
metadata.TopLevelPackages = topLevelPackages
return pkgs, sources, nil
}
// assembleEggOrWheelMetadata discovers and accumulates python package metadata from multiple file sources and returns a single metadata object as well as a list of files where the metadata was derived from.
func (c *PackageCataloger) assembleEggOrWheelMetadata(resolver scope.Resolver, metadataRef file.Reference) (*pkg.PythonPackageMetadata, []file.Reference, error) {
var sources = []file.Reference{metadataRef}
metadataContents, err := resolver.FileContentsByRef(metadataRef)
if err != nil {
return nil, nil, err
}
metadata, err := parseWheelOrEggMetadata(metadataRef.Path, strings.NewReader(metadataContents))
if err != nil {
return nil, nil, err
}
// attach any python files found for the given wheel/egg installation
r, s, err := c.fetchRecordFiles(resolver, metadataRef)
if err != nil {
return nil, nil, err
}
sources = append(sources, s...)
metadata.Files = r
// attach any top-level package names found for the given wheel/egg installation
p, s, err := c.fetchTopLevelPackages(resolver, metadataRef)
if err != nil {
return nil, nil, err
}
sources = append(sources, s...)
metadata.TopLevelPackages = p
return &metadata, sources, nil
}
func (c *PackageCataloger) catalogEggOrWheel(resolver scope.Resolver, metadataRef file.Reference) (*pkg.Package, error) {
metadata, sources, err := c.assembleEggOrWheelMetadata(resolver, metadataRef)
if err != nil {
return nil, err
}
var licenses []string
if metadata.License != "" {
licenses = []string{metadata.License}
}
return &pkg.Package{
Name: metadata.Name,
Version: metadata.Version,
FoundBy: c.Name(),
Source: sources,
Licenses: licenses,
Language: pkg.Python,
Type: pkg.PythonPkg,
MetadataType: pkg.PythonPackageMetadataType,
Metadata: *metadata,
}, nil
}

View file

@ -142,12 +142,12 @@ func TestPythonPackageWheelCataloger(t *testing.T) {
AuthorEmail: "me@kennethreitz.org",
SitePackagesRootPath: "test-fixtures",
Files: []pkg.PythonFileRecord{
{Path: "requests-2.22.0.dist-info/INSTALLER", Digest: pkg.Digest{"sha256", "zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg"}, Size: "4"},
{Path: "requests/__init__.py", Digest: pkg.Digest{"sha256", "PnKCgjcTq44LaAMzB-7--B2FdewRrE8F_vjZeaG9NhA"}, Size: "3921"},
{Path: "requests-2.22.0.dist-info/INSTALLER", Digest: &pkg.Digest{"sha256", "zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg"}, Size: "4"},
{Path: "requests/__init__.py", Digest: &pkg.Digest{"sha256", "PnKCgjcTq44LaAMzB-7--B2FdewRrE8F_vjZeaG9NhA"}, Size: "3921"},
{Path: "requests/__pycache__/__version__.cpython-38.pyc"},
{Path: "requests/__pycache__/utils.cpython-38.pyc"},
{Path: "requests/__version__.py", Digest: pkg.Digest{"sha256", "Bm-GFstQaFezsFlnmEMrJDe8JNROz9n2XXYtODdvjjc"}, Size: "436"},
{Path: "requests/utils.py", Digest: pkg.Digest{"sha256", "LtPJ1db6mJff2TJSJWKi7rBpzjPS3mSOrjC9zRhoD3A"}, Size: "30049"},
{Path: "requests/__version__.py", Digest: &pkg.Digest{"sha256", "Bm-GFstQaFezsFlnmEMrJDe8JNROz9n2XXYtODdvjjc"}, Size: "436"},
{Path: "requests/utils.py", Digest: &pkg.Digest{"sha256", "LtPJ1db6mJff2TJSJWKi7rBpzjPS3mSOrjC9zRhoD3A"}, Size: "30049"},
},
TopLevelPackages: []string{"requests"},
},
@ -174,11 +174,11 @@ func TestPythonPackageWheelCataloger(t *testing.T) {
AuthorEmail: "georg@python.org",
SitePackagesRootPath: "test-fixtures",
Files: []pkg.PythonFileRecord{
{Path: "../../../bin/pygmentize", Digest: pkg.Digest{"sha256", "dDhv_U2jiCpmFQwIRHpFRLAHUO4R1jIJPEvT_QYTFp8"}, Size: "220"},
{Path: "Pygments-2.6.1.dist-info/AUTHORS", Digest: pkg.Digest{"sha256", "PVpa2_Oku6BGuiUvutvuPnWGpzxqFy2I8-NIrqCvqUY"}, Size: "8449"},
{Path: "../../../bin/pygmentize", Digest: &pkg.Digest{"sha256", "dDhv_U2jiCpmFQwIRHpFRLAHUO4R1jIJPEvT_QYTFp8"}, Size: "220"},
{Path: "Pygments-2.6.1.dist-info/AUTHORS", Digest: &pkg.Digest{"sha256", "PVpa2_Oku6BGuiUvutvuPnWGpzxqFy2I8-NIrqCvqUY"}, Size: "8449"},
{Path: "Pygments-2.6.1.dist-info/RECORD"},
{Path: "pygments/__pycache__/__init__.cpython-38.pyc"},
{Path: "pygments/util.py", Digest: pkg.Digest{"sha256", "586xXHiJGGZxqk5PMBu3vBhE68DLuAe5MBARWrSPGxA"}, Size: "10778"},
{Path: "pygments/util.py", Digest: &pkg.Digest{"sha256", "586xXHiJGGZxqk5PMBu3vBhE68DLuAe5MBARWrSPGxA"}, Size: "10778"},
},
TopLevelPackages: []string{"pygments", "something_else"},
},
@ -220,6 +220,11 @@ func TestPythonPackageWheelCataloger(t *testing.T) {
test.ExpectedPackage.Source = append(test.ExpectedPackage.Source, *resolver.recordRef)
}
if resolver.topLevelRef != nil {
test.ExpectedPackage.Source = append(test.ExpectedPackage.Source, *resolver.topLevelRef)
}
// end patching expected values with runtime data...
pyPkgCataloger := NewPythonPackageCataloger()
actual, err := pyPkgCataloger.catalogEggOrWheel(resolver, *resolver.metadataRef)

View file

@ -44,7 +44,7 @@ func parseWheelOrEggRecord(reader io.Reader) ([]pkg.PythonFileRecord, error) {
return nil, fmt.Errorf("unexpected python record digest: %q", item)
}
record.Digest = pkg.Digest{
record.Digest = &pkg.Digest{
Algorithm: fields[0],
Value: fields[1],
}

View file

@ -16,22 +16,22 @@ func TestParseWheelEggRecord(t *testing.T) {
{
Fixture: "test-fixtures/egg-info/RECORD",
ExpectedMetadata: []pkg.PythonFileRecord{
{Path: "requests-2.22.0.dist-info/INSTALLER", Digest: pkg.Digest{"sha256", "zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg"}, Size: "4"},
{Path: "requests/__init__.py", Digest: pkg.Digest{"sha256", "PnKCgjcTq44LaAMzB-7--B2FdewRrE8F_vjZeaG9NhA"}, Size: "3921"},
{Path: "requests-2.22.0.dist-info/INSTALLER", Digest: &pkg.Digest{"sha256", "zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg"}, Size: "4"},
{Path: "requests/__init__.py", Digest: &pkg.Digest{"sha256", "PnKCgjcTq44LaAMzB-7--B2FdewRrE8F_vjZeaG9NhA"}, Size: "3921"},
{Path: "requests/__pycache__/__version__.cpython-38.pyc"},
{Path: "requests/__pycache__/utils.cpython-38.pyc"},
{Path: "requests/__version__.py", Digest: pkg.Digest{"sha256", "Bm-GFstQaFezsFlnmEMrJDe8JNROz9n2XXYtODdvjjc"}, Size: "436"},
{Path: "requests/utils.py", Digest: pkg.Digest{"sha256", "LtPJ1db6mJff2TJSJWKi7rBpzjPS3mSOrjC9zRhoD3A"}, Size: "30049"},
{Path: "requests/__version__.py", Digest: &pkg.Digest{"sha256", "Bm-GFstQaFezsFlnmEMrJDe8JNROz9n2XXYtODdvjjc"}, Size: "436"},
{Path: "requests/utils.py", Digest: &pkg.Digest{"sha256", "LtPJ1db6mJff2TJSJWKi7rBpzjPS3mSOrjC9zRhoD3A"}, Size: "30049"},
},
},
{
Fixture: "test-fixtures/dist-info/RECORD",
ExpectedMetadata: []pkg.PythonFileRecord{
{Path: "../../../bin/pygmentize", Digest: pkg.Digest{"sha256", "dDhv_U2jiCpmFQwIRHpFRLAHUO4R1jIJPEvT_QYTFp8"}, Size: "220"},
{Path: "Pygments-2.6.1.dist-info/AUTHORS", Digest: pkg.Digest{"sha256", "PVpa2_Oku6BGuiUvutvuPnWGpzxqFy2I8-NIrqCvqUY"}, Size: "8449"},
{Path: "../../../bin/pygmentize", Digest: &pkg.Digest{"sha256", "dDhv_U2jiCpmFQwIRHpFRLAHUO4R1jIJPEvT_QYTFp8"}, Size: "220"},
{Path: "Pygments-2.6.1.dist-info/AUTHORS", Digest: &pkg.Digest{"sha256", "PVpa2_Oku6BGuiUvutvuPnWGpzxqFy2I8-NIrqCvqUY"}, Size: "8449"},
{Path: "Pygments-2.6.1.dist-info/RECORD"},
{Path: "pygments/__pycache__/__init__.cpython-38.pyc"},
{Path: "pygments/util.py", Digest: pkg.Digest{"sha256", "586xXHiJGGZxqk5PMBu3vBhE68DLuAe5MBARWrSPGxA"}, Size: "10778"},
{Path: "pygments/util.py", Digest: &pkg.Digest{"sha256", "586xXHiJGGZxqk5PMBu3vBhE68DLuAe5MBARWrSPGxA"}, Size: "10778"},
},
},
}

View file

@ -7,9 +7,9 @@ type Digest struct {
// PythonFileRecord represents a single entry within a RECORD file for a python wheel or egg package
type PythonFileRecord struct {
Path string `json:"path"`
Digest Digest `json:"digest"`
Size string `json:"size"`
Path string `json:"path"`
Digest *Digest `json:"digest,omitempty"`
Size string `json:"size,omitempty"`
}
// PythonPackageMetadata represents all captured data for a python egg or wheel package.