From 04d288b3643f906255af88108f27712bb2be5b63 Mon Sep 17 00:00:00 2001 From: Christopher Angelo Phillips <32073428+spiffcs@users.noreply.github.com> Date: Mon, 19 Sep 2022 16:08:02 -0400 Subject: [PATCH] feat: catalog python files for installed-files.txt file metadata (#1217) Co-authored-by: houdini91 --- .../pkg/cataloger/python/package_cataloger.go | 41 ++++++++++++++++++- .../python/parse_wheel_egg_record.go | 33 +++++++++++++++ .../python/parse_wheel_egg_record_test.go | 39 +++++++++++++++++- .../installed-files/installed-files.txt | 6 +++ 4 files changed, 117 insertions(+), 2 deletions(-) create mode 100644 syft/pkg/cataloger/python/test-fixtures/installed-files/installed-files.txt diff --git a/syft/pkg/cataloger/python/package_cataloger.go b/syft/pkg/cataloger/python/package_cataloger.go index e27610fd4..bdcb9c23a 100644 --- a/syft/pkg/cataloger/python/package_cataloger.go +++ b/syft/pkg/cataloger/python/package_cataloger.go @@ -8,6 +8,7 @@ import ( "path/filepath" "github.com/anchore/syft/internal" + "github.com/anchore/syft/internal/log" "github.com/anchore/syft/syft/artifact" "github.com/anchore/syft/syft/pkg" "github.com/anchore/syft/syft/source" @@ -91,13 +92,44 @@ func (c *PackageCataloger) catalogEggOrWheel(resolver source.FileResolver, metad return p, nil } +// fetchRecordFiles finds a corresponding installed-files.txt file for the given python package metadata file and returns the set of file records contained. +func (c *PackageCataloger) fetchInstalledFiles(resolver source.FileResolver, metadataLocation source.Location, sitePackagesRootPath string) (files []pkg.PythonFileRecord, sources []source.Location, err error) { + // we've been given a file reference to a specific wheel METADATA file. note: this may be for a directory + // or for an image... for an image the METADATA file may be present within multiple layers, so it is important + // to reconcile the installed-files.txt path to the same layer (or the next adjacent lower layer). + + // find the installed-files.txt file relative to the directory where the METADATA file resides (in path AND layer structure) + installedFilesPath := filepath.Join(filepath.Dir(metadataLocation.RealPath), "installed-files.txt") + installedFilesRef := resolver.RelativeFileByPath(metadataLocation, installedFilesPath) + + if installedFilesRef != nil { + sources = append(sources, *installedFilesRef) + + installedFilesContents, err := resolver.FileContentsByLocation(*installedFilesRef) + if err != nil { + return nil, nil, err + } + defer internal.CloseAndLogError(installedFilesContents, installedFilesPath) + + // parse the installed-files contents + installedFiles, err := parseInstalledFiles(installedFilesContents, metadataLocation.RealPath, sitePackagesRootPath) + if err != nil { + log.Warnf("unable to parse installed-files.txt for python package=%+v: %w", metadataLocation.RealPath, err) + return files, sources, nil + } + + files = append(files, installedFiles...) + } + return files, sources, nil +} + // fetchRecordFiles finds a corresponding RECORD file for the given python package metadata file and returns the set of file records contained. func (c *PackageCataloger) fetchRecordFiles(resolver source.FileResolver, metadataLocation source.Location) (files []pkg.PythonFileRecord, sources []source.Location, err error) { // we've been given a file reference to a specific wheel METADATA file. note: this may be for a directory // or for an image... for an image the METADATA file may be present within multiple layers, so it is important // to reconcile the RECORD path to the same layer (or the next adjacent lower layer). - // lets find the RECORD file relative to the directory where the METADATA file resides (in path AND layer structure) + // find the RECORD file relative to the directory where the METADATA file resides (in path AND layer structure) recordPath := filepath.Join(filepath.Dir(metadataLocation.RealPath), "RECORD") recordRef := resolver.RelativeFileByPath(metadataLocation, recordPath) @@ -206,6 +238,13 @@ func (c *PackageCataloger) assembleEggOrWheelMetadata(resolver source.FileResolv if err != nil { return nil, nil, err } + if len(r) == 0 { + r, s, err = c.fetchInstalledFiles(resolver, metadataLocation, metadata.SitePackagesRootPath) + if err != nil { + return nil, nil, err + } + } + sources = append(sources, s...) metadata.Files = r diff --git a/syft/pkg/cataloger/python/parse_wheel_egg_record.go b/syft/pkg/cataloger/python/parse_wheel_egg_record.go index 764b6bf08..8b3fb3987 100644 --- a/syft/pkg/cataloger/python/parse_wheel_egg_record.go +++ b/syft/pkg/cataloger/python/parse_wheel_egg_record.go @@ -1,9 +1,11 @@ package python import ( + "bufio" "encoding/csv" "fmt" "io" + "path/filepath" "strings" "github.com/anchore/syft/internal/log" @@ -59,3 +61,34 @@ func parseWheelOrEggRecord(reader io.Reader) ([]pkg.PythonFileRecord, error) { return records, nil } + +func parseInstalledFiles(reader io.Reader, location, sitePackagesRootPath string) ([]pkg.PythonFileRecord, error) { + var installedFiles []pkg.PythonFileRecord + r := bufio.NewReader(reader) + + for { + line, err := r.ReadString('\n') + if err == io.EOF { + break + } + if err != nil { + return nil, fmt.Errorf("unable to read python installed-files file: %w", err) + } + + if location != "" && sitePackagesRootPath != "" { + joinedPath := filepath.Join(filepath.Dir(location), line) + line, err = filepath.Rel(sitePackagesRootPath, joinedPath) + if err != nil { + return nil, err + } + } + + installedFile := pkg.PythonFileRecord{ + Path: strings.ReplaceAll(line, "\n", ""), + } + + installedFiles = append(installedFiles, installedFile) + } + + return installedFiles, nil +} diff --git a/syft/pkg/cataloger/python/parse_wheel_egg_record_test.go b/syft/pkg/cataloger/python/parse_wheel_egg_record_test.go index 5090d4bd3..f7da09179 100644 --- a/syft/pkg/cataloger/python/parse_wheel_egg_record_test.go +++ b/syft/pkg/cataloger/python/parse_wheel_egg_record_test.go @@ -55,5 +55,42 @@ func TestParseWheelEggRecord(t *testing.T) { } }) } - +} + +func TestParseInstalledFiles(t *testing.T) { + tests := []struct { + Fixture string + ExpectedMetadata []pkg.PythonFileRecord + }{ + { + Fixture: "test-fixtures/installed-files/installed-files.txt", + ExpectedMetadata: []pkg.PythonFileRecord{ + {Path: "../__pycache__/dicttoxml.cpython-36.pyc"}, + {Path: "../dicttoxml.py"}, + {Path: "PKG-INFO"}, + {Path: "SOURCES.txt"}, + {Path: "dependency_links.txt"}, + {Path: "top_level.txt"}, + }, + }, + } + + for _, test := range tests { + t.Run(test.Fixture, func(t *testing.T) { + fixture, err := os.Open(test.Fixture) + if err != nil { + t.Fatalf("failed to open fixture: %+v", err) + } + + actual, err := parseInstalledFiles(fixture, "", "") + if err != nil { + t.Fatalf("failed to parse: %+v", err) + } + + for _, d := range deep.Equal(actual, test.ExpectedMetadata) { + t.Errorf("diff: %+v", d) + } + + }) + } } diff --git a/syft/pkg/cataloger/python/test-fixtures/installed-files/installed-files.txt b/syft/pkg/cataloger/python/test-fixtures/installed-files/installed-files.txt new file mode 100644 index 000000000..6fcd98620 --- /dev/null +++ b/syft/pkg/cataloger/python/test-fixtures/installed-files/installed-files.txt @@ -0,0 +1,6 @@ +../__pycache__/dicttoxml.cpython-36.pyc +../dicttoxml.py +PKG-INFO +SOURCES.txt +dependency_links.txt +top_level.txt