From 7a8a5419b8bacad5b2685e578cbb60ee533b35fe Mon Sep 17 00:00:00 2001 From: Toure Date: Mon, 5 Oct 2020 17:06:03 -0400 Subject: [PATCH 1/8] adding ruby gemspec support. Signed-off-by: Toure Dunnon --- syft/cataloger/bundler/cataloger.go | 3 +- .../bundler/parse_gemfile_lock_test.go | 115 ++++++++-------- syft/cataloger/bundler/parse_gemspec.go | 125 ++++++++++++++++++ syft/cataloger/bundler/parse_gemspec_test.go | 50 +++++++ .../bundler/test-fixtures/bundler.gemspec | 25 ++++ syft/lib.go | 10 ++ syft/pkg/gem_metadata.go | 7 + 7 files changed, 277 insertions(+), 58 deletions(-) create mode 100644 syft/cataloger/bundler/parse_gemspec.go create mode 100644 syft/cataloger/bundler/parse_gemspec_test.go create mode 100644 syft/cataloger/bundler/test-fixtures/bundler.gemspec create mode 100644 syft/pkg/gem_metadata.go diff --git a/syft/cataloger/bundler/cataloger.go b/syft/cataloger/bundler/cataloger.go index 265702659..aabc4ca42 100644 --- a/syft/cataloger/bundler/cataloger.go +++ b/syft/cataloger/bundler/cataloger.go @@ -18,7 +18,8 @@ type Cataloger struct { // New returns a new Bundler cataloger object. func New() *Cataloger { globParsers := map[string]common.ParserFn{ - "**/Gemfile.lock": parseGemfileLockEntries, + "**/Gemfile.lock": parseGemfileLockEntries, // valid in a dir context + //"**/specification/*.gemspec": parseGemSpecEntries, // valid in an image context (against installed gems) } return &Cataloger{ diff --git a/syft/cataloger/bundler/parse_gemfile_lock_test.go b/syft/cataloger/bundler/parse_gemfile_lock_test.go index cc17ad0af..428a80c01 100644 --- a/syft/cataloger/bundler/parse_gemfile_lock_test.go +++ b/syft/cataloger/bundler/parse_gemfile_lock_test.go @@ -7,61 +7,62 @@ import ( "github.com/anchore/syft/syft/pkg" ) -var expected = map[string]string{ - "actionmailer": "4.1.1", - "actionpack": "4.1.1", - "actionview": "4.1.1", - "activemodel": "4.1.1", - "activerecord": "4.1.1", - "activesupport": "4.1.1", - "arel": "5.0.1.20140414130214", - "bootstrap-sass": "3.1.1.1", - "builder": "3.2.2", - "coffee-rails": "4.0.1", - "coffee-script": "2.2.0", - "coffee-script-source": "1.7.0", - "erubis": "2.7.0", - "execjs": "2.0.2", - "hike": "1.2.3", - "i18n": "0.6.9", - "jbuilder": "2.0.7", - "jquery-rails": "3.1.0", - "json": "1.8.1", - "kgio": "2.9.2", - "libv8": "3.16.14.3", - "mail": "2.5.4", - "mime-types": "1.25.1", - "minitest": "5.3.4", - "multi_json": "1.10.1", - "mysql2": "0.3.16", - "polyglot": "0.3.4", - "rack": "1.5.2", - "rack-test": "0.6.2", - "rails": "4.1.1", - "railties": "4.1.1", - "raindrops": "0.13.0", - "rake": "10.3.2", - "rdoc": "4.1.1", - "ref": "1.0.5", - "sass": "3.2.19", - "sass-rails": "4.0.3", - "sdoc": "0.4.0", - "spring": "1.1.3", - "sprockets": "2.11.0", - "sprockets-rails": "2.1.3", - "sqlite3": "1.3.9", - "therubyracer": "0.12.1", - "thor": "0.19.1", - "thread_safe": "0.3.3", - "tilt": "1.4.1", - "treetop": "1.4.15", - "turbolinks": "2.2.2", - "tzinfo": "1.2.0", - "uglifier": "2.5.0", - "unicorn": "4.8.3", -} - func TestParseGemfileLockEntries(t *testing.T) { + + var expectedGems = map[string]string{ + "actionmailer": "4.1.1", + "actionpack": "4.1.1", + "actionview": "4.1.1", + "activemodel": "4.1.1", + "activerecord": "4.1.1", + "activesupport": "4.1.1", + "arel": "5.0.1.20140414130214", + "bootstrap-sass": "3.1.1.1", + "builder": "3.2.2", + "coffee-rails": "4.0.1", + "coffee-script": "2.2.0", + "coffee-script-source": "1.7.0", + "erubis": "2.7.0", + "execjs": "2.0.2", + "hike": "1.2.3", + "i18n": "0.6.9", + "jbuilder": "2.0.7", + "jquery-rails": "3.1.0", + "json": "1.8.1", + "kgio": "2.9.2", + "libv8": "3.16.14.3", + "mail": "2.5.4", + "mime-types": "1.25.1", + "minitest": "5.3.4", + "multi_json": "1.10.1", + "mysql2": "0.3.16", + "polyglot": "0.3.4", + "rack": "1.5.2", + "rack-test": "0.6.2", + "rails": "4.1.1", + "railties": "4.1.1", + "raindrops": "0.13.0", + "rake": "10.3.2", + "rdoc": "4.1.1", + "ref": "1.0.5", + "sass": "3.2.19", + "sass-rails": "4.0.3", + "sdoc": "0.4.0", + "spring": "1.1.3", + "sprockets": "2.11.0", + "sprockets-rails": "2.1.3", + "sqlite3": "1.3.9", + "therubyracer": "0.12.1", + "thor": "0.19.1", + "thread_safe": "0.3.3", + "tilt": "1.4.1", + "treetop": "1.4.15", + "turbolinks": "2.2.2", + "tzinfo": "1.2.0", + "uglifier": "2.5.0", + "unicorn": "4.8.3", + } + fixture, err := os.Open("test-fixtures/Gemfile.lock") if err != nil { t.Fatalf("failed to open fixture: %+v", err) @@ -72,15 +73,15 @@ func TestParseGemfileLockEntries(t *testing.T) { t.Fatalf("failed to parse gemfile lock: %+v", err) } - if len(actual) != len(expected) { + if len(actual) != len(expectedGems) { for _, a := range actual { t.Log(" ", a) } - t.Fatalf("unexpected package count: %d!=%d", len(actual), len(expected)) + t.Fatalf("unexpected package count: %d!=%d", len(actual), len(expectedGems)) } for _, a := range actual { - expectedVersion, ok := expected[a.Name] + expectedVersion, ok := expectedGems[a.Name] if !ok { t.Errorf("unexpected package found: %s", a.Name) } diff --git a/syft/cataloger/bundler/parse_gemspec.go b/syft/cataloger/bundler/parse_gemspec.go new file mode 100644 index 000000000..91cfa8c01 --- /dev/null +++ b/syft/cataloger/bundler/parse_gemspec.go @@ -0,0 +1,125 @@ +package bundler + +import ( + "bufio" + "fmt" + "io" + "regexp" + "strings" + + "github.com/mitchellh/mapstructure" + + "github.com/anchore/syft/syft/cataloger/common" + "github.com/anchore/syft/syft/pkg" +) + +// integrity check +var _ common.ParserFn = parseGemfileLockEntries + +// for line in gem.splitlines(): +// line = line.strip() +// line = re.sub(r"\.freeze", "", line) + +// # look for the unicode \u{} format and try to convert to something python can use +// patt = re.match(r".*\.homepage *= *(.*) *", line) +// if patt: +// sourcepkg = json.loads(patt.group(1)) + +// patt = re.match(r".*\.licenses *= *(.*) *", line) +// if patt: +// lstr = re.sub(r"^\[|\]$", "", patt.group(1)).split(',') +// for thestr in lstr: +// thestr = re.sub(' *" *', "", thestr) +// lics.append(thestr) + +// patt = re.match(r".*\.authors *= *(.*) *", line) +// if patt: +// lstr = re.sub(r"^\[|\]$", "", patt.group(1)).split(',') +// for thestr in lstr: +// thestr = re.sub(' *" *', "", thestr) +// origins.append(thestr) + +// patt = re.match(r".*\.files *= *(.*) *", line) +// if patt: +// lstr = re.sub(r"^\[|\]$", "", patt.group(1)).split(',') +// for thestr in lstr: +// thestr = re.sub(' *" *', "", thestr) +// rfiles.append(thestr) + +type listProcessor func(string) []string + +var patterns = map[string]*regexp.Regexp{ + // match example: name = "railties".freeze ---> railties + "name": regexp.MustCompile(`.*\.name\s*=\s*["']{1}(?P.*)["']{1} *`), + // match example: version = "1.0.4".freeze ---> 1.0.4 + "version": regexp.MustCompile(`.*\.version\s*=\s*["']{1}(?P.*)["']{1} *`), + // match example: homepage = "https://github.com/anchore/syft".freeze ---> https://github.com/anchore/syft + "homepage": regexp.MustCompile(`.*\.homepage\s*=\s*["']{1}(?P.*)["']{1} *`), + // TODO: add more fields +} + +// TODO: use post processors for lists +var postProcessors = map[string]listProcessor{ + //"files": func(s string) []string { + // + //}, +} + +func parseGemspecEntries(_ string, reader io.Reader) ([]pkg.Package, error) { + var pkgs []pkg.Package + var fields = make(map[string]interface{}) + scanner := bufio.NewScanner(reader) + + for scanner.Scan() { + line := scanner.Text() + + // TODO: sanitize unicode? (see engine code) + sanitizedLine := strings.TrimSpace(line) + + if sanitizedLine == "" { + continue + } + + for field, pattern := range patterns { + matchMap := matchCaptureGroups(pattern, sanitizedLine) + if value := matchMap[field]; value != "" { + if postProcessor := postProcessors[field]; postProcessor != nil { + fields[field] = postProcessor(value) + } else { + fields[field] = value + } + // TODO: know that a line could actually match on multiple patterns, this is unlikely though + break + } + } + } + + if fields["name"] != "" && fields["version"] != "" { + var metadata pkg.GemMetadata + if err := mapstructure.Decode(fields, &metadata); err != nil { + return nil, fmt.Errorf("unable to decode gem metadata: %w", err) + } + + pkgs = append(pkgs, pkg.Package{ + Name: metadata.Name, + Version: metadata.Version, + Language: pkg.Ruby, + Type: pkg.BundlerPkg, + Metadata: metadata, + }) + } + + return pkgs, nil +} + +// matchCaptureGroups takes a regular expression and string and returns all of the named capture group results in a map. +func matchCaptureGroups(regEx *regexp.Regexp, str string) map[string]string { + match := regEx.FindStringSubmatch(str) + results := make(map[string]string) + for i, name := range regEx.SubexpNames() { + if i > 0 && i <= len(match) { + results[name] = match[i] + } + } + return results +} diff --git a/syft/cataloger/bundler/parse_gemspec_test.go b/syft/cataloger/bundler/parse_gemspec_test.go new file mode 100644 index 000000000..f05bb982b --- /dev/null +++ b/syft/cataloger/bundler/parse_gemspec_test.go @@ -0,0 +1,50 @@ +package bundler + +import ( + "os" + "testing" + + "github.com/anchore/syft/syft/pkg" +) + +func TestParseGemspec(t *testing.T) { + var expectedGems = map[string]string{ + "bundler": "2.1.4", + } + + fixture, err := os.Open("test-fixtures/bundler.gemspec") + if err != nil { + t.Fatalf("failed to open fixture: %+v", err) + } + + actual, err := parseGemspecEntries(fixture.Name(), fixture) + if err != nil { + t.Fatalf("failed to parse gemspec: %+v", err) + } + + if len(actual) != len(expectedGems) { + for _, a := range actual { + t.Log(" ", a) + } + t.Fatalf("unexpected package count: %d!=%d", len(actual), len(expectedGems)) + } + + for _, a := range actual { + expectedVersion, ok := expectedGems[a.Name] + if !ok { + t.Errorf("unexpected package found: %s", a.Name) + } + + if expectedVersion != a.Version { + t.Errorf("unexpected package version (pkg=%s): %s", a.Name, a.Version) + } + + if a.Language != pkg.Ruby { + t.Errorf("bad language (pkg=%+v): %+v", a.Name, a.Language) + } + + if a.Type != pkg.BundlerPkg { + t.Errorf("bad package type (pkg=%+v): %+v", a.Name, a.Type) + } + } +} diff --git a/syft/cataloger/bundler/test-fixtures/bundler.gemspec b/syft/cataloger/bundler/test-fixtures/bundler.gemspec new file mode 100644 index 000000000..450b81096 --- /dev/null +++ b/syft/cataloger/bundler/test-fixtures/bundler.gemspec @@ -0,0 +1,25 @@ +# frozen_string_literal: true +# -*- encoding: utf-8 -*- +# stub: bundler 2.1.4 ruby lib + +Gem::Specification.new do |s| + s.name = "bundler".freeze + s.version = "2.1.4" + + s.required_rubygems_version = Gem::Requirement.new(">= 2.5.2".freeze) if s.respond_to? :required_rubygems_version= + s.require_paths = ["lib".freeze] + s.authors = ["Andr\u00E9 Arko".freeze, "Samuel Giddins".freeze, "Colby Swandale".freeze, "Hiroshi Shibata".freeze, "David Rodr\u00EDguez".freeze, "Grey Baker".f + s.bindir = "exe".freeze + s.date = "2020-01-05" + s.description = "Bundler manages an application's dependencies through its entire life, across many machines, systematically and repeatably".freeze + s.email = ["team@bundler.io".freeze] + s.executables = ["bundle".freeze, "bundler".freeze] + s.files = ["exe/bundle".freeze, "exe/bundler".freeze] + s.homepage = "https://bundler.io".freeze + s.licenses = ["MIT".freeze] + s.required_ruby_version = Gem::Requirement.new(">= 2.3.0".freeze) + s.rubygems_version = "3.1.2".freeze + s.summary = "The best way to manage your application's dependencies".freeze + + s.installed_by_version = "3.1.2" if s.respond_to? :installed_by_version + end \ No newline at end of file diff --git a/syft/lib.go b/syft/lib.go index 7d7a7da88..e766247fb 100644 --- a/syft/lib.go +++ b/syft/lib.go @@ -62,6 +62,16 @@ func IdentifyDistro(s scope.Scope) distro.Distro { // Catalog the given scope, which may represent a container image or filesystem. Returns the discovered set of packages. func CatalogFromScope(s scope.Scope) (*pkg.Catalog, error) { log.Info("building the catalog") + + // conditionally have two sets of catalogers + //var catalogers []cataloger.Cataloger + //// if image + //// use one set of catalogers + //catalogers = ... + // + //// if dir + //// use another set of catalogers + return cataloger.Catalog(s.Resolver, cataloger.All()...) } diff --git a/syft/pkg/gem_metadata.go b/syft/pkg/gem_metadata.go new file mode 100644 index 000000000..164f6b007 --- /dev/null +++ b/syft/pkg/gem_metadata.go @@ -0,0 +1,7 @@ +package pkg + +type GemMetadata struct { + Name string `mapstructure:"name" json:"name"` + Version string `mapstructure:"version" json:"version"` + // TODO: add more fields from the gemspec +} From 1c320a8382edaba5415c8085fd222bdc902d6497 Mon Sep 17 00:00:00 2001 From: Alex Goodman Date: Tue, 6 Oct 2020 11:23:54 -0400 Subject: [PATCH 2/8] defer to GenericCataloger instances for pkg catalogers Signed-off-by: Alex Goodman --- syft/cataloger/apkdb/cataloger.go | 31 ++------------ syft/cataloger/bundler/cataloger.go | 39 +++++------------ syft/cataloger/cataloger.go | 18 ++++---- syft/cataloger/common/generic_cataloger.go | 35 +++++++++------- .../common/generic_cataloger_test.go | 7 ++-- syft/cataloger/deb/cataloger.go | 17 ++++++++ .../{dpkg => deb}/parse_dpkg_status.go | 2 +- .../{dpkg => deb}/parse_dpkg_status_test.go | 2 +- .../{dpkg => deb}/test-fixtures/multiple | 0 .../{dpkg => deb}/test-fixtures/single | 0 syft/cataloger/dpkg/cataloger.go | 42 ------------------- syft/cataloger/golang/cataloger.go | 31 ++------------ syft/cataloger/java/cataloger.go | 31 ++------------ syft/cataloger/javascript/cataloger.go | 31 ++------------ syft/cataloger/python/cataloger.go | 31 ++------------ syft/cataloger/rpmdb/cataloger.go | 32 ++------------ 16 files changed, 81 insertions(+), 268 deletions(-) create mode 100644 syft/cataloger/deb/cataloger.go rename syft/cataloger/{dpkg => deb}/parse_dpkg_status.go (99%) rename syft/cataloger/{dpkg => deb}/parse_dpkg_status_test.go (99%) rename syft/cataloger/{dpkg => deb}/test-fixtures/multiple (100%) rename syft/cataloger/{dpkg => deb}/test-fixtures/single (100%) delete mode 100644 syft/cataloger/dpkg/cataloger.go diff --git a/syft/cataloger/apkdb/cataloger.go b/syft/cataloger/apkdb/cataloger.go index 8f33709f6..4511e9d57 100644 --- a/syft/cataloger/apkdb/cataloger.go +++ b/syft/cataloger/apkdb/cataloger.go @@ -4,39 +4,14 @@ Package apkdb provides a concrete Cataloger implementation for Alpine DB files. package apkdb import ( - "github.com/anchore/stereoscope/pkg/file" "github.com/anchore/syft/syft/cataloger/common" - "github.com/anchore/syft/syft/pkg" - "github.com/anchore/syft/syft/scope" ) -// Cataloger catalogs pkg.ApkPkg Package Types defined in Alpine DB files. -type Cataloger struct { - cataloger common.GenericCataloger -} - -// New returns a new Alpine DB cataloger object. -func New() *Cataloger { +// NewApkdbCataloger returns a new Alpine DB cataloger object. +func NewApkdbCataloger() *common.GenericCataloger { globParsers := map[string]common.ParserFn{ "**/lib/apk/db/installed": parseApkDB, } - return &Cataloger{ - cataloger: common.NewGenericCataloger(nil, globParsers), - } -} - -// Name returns a string that uniquely describes this cataloger. -func (a *Cataloger) Name() string { - return "apkdb-cataloger" -} - -// SelectFiles returns a set of discovered Alpine DB files from the user content source. -func (a *Cataloger) SelectFiles(resolver scope.FileResolver) []file.Reference { - return a.cataloger.SelectFiles(resolver) -} - -// Catalog returns the Packages indexed from all Alpine DB files discovered. -func (a *Cataloger) Catalog(contents map[file.Reference]string) ([]pkg.Package, error) { - return a.cataloger.Catalog(contents, a.Name()) + return common.NewGenericCataloger(nil, globParsers, "apkdb-cataloger") } diff --git a/syft/cataloger/bundler/cataloger.go b/syft/cataloger/bundler/cataloger.go index aabc4ca42..dcaced018 100644 --- a/syft/cataloger/bundler/cataloger.go +++ b/syft/cataloger/bundler/cataloger.go @@ -4,40 +4,23 @@ Package bundler provides a concrete Cataloger implementation for Ruby Gemfile.lo package bundler import ( - "github.com/anchore/stereoscope/pkg/file" "github.com/anchore/syft/syft/cataloger/common" - "github.com/anchore/syft/syft/pkg" - "github.com/anchore/syft/syft/scope" ) -// Cataloger catalogs pkg.GemPkg Package Types defined in Bundler Gemfile.lock files. -type Cataloger struct { - cataloger common.GenericCataloger -} - -// New returns a new Bundler cataloger object. -func New() *Cataloger { +// NewGemfileLockCataloger returns a new Bundler cataloger object tailored for parsing index-oriented files (e.g. Gemfile.lock). +func NewGemfileLockCataloger() *common.GenericCataloger { globParsers := map[string]common.ParserFn{ - "**/Gemfile.lock": parseGemfileLockEntries, // valid in a dir context - //"**/specification/*.gemspec": parseGemSpecEntries, // valid in an image context (against installed gems) + "**/Gemfile.lock": parseGemfileLockEntries, } - return &Cataloger{ - cataloger: common.NewGenericCataloger(nil, globParsers), + return common.NewGenericCataloger(nil, globParsers, "ruby-gemfile-cataloger") +} + +// NewGemspecCataloger returns a new Bundler cataloger object tailored for detecting installations of gems (e.g. Gemspec). +func NewGemspecCataloger() *common.GenericCataloger { + globParsers := map[string]common.ParserFn{ + "**/specification/*.gemspec": parseGemspecEntries, } -} -// Name returns a string that uniquely describes this cataloger. -func (a *Cataloger) Name() string { - return "bundler-cataloger" -} - -// SelectFiles returns a set of discovered Gemfile.lock files from the user content source. -func (a *Cataloger) SelectFiles(resolver scope.FileResolver) []file.Reference { - return a.cataloger.SelectFiles(resolver) -} - -// Catalog returns the Packages indexed from all Gemfile.lock files discovered. -func (a *Cataloger) Catalog(contents map[file.Reference]string) ([]pkg.Package, error) { - return a.cataloger.Catalog(contents, a.Name()) + return common.NewGenericCataloger(nil, globParsers, "ruby-gemspec-cataloger") } diff --git a/syft/cataloger/cataloger.go b/syft/cataloger/cataloger.go index 098445abb..48c1bc341 100644 --- a/syft/cataloger/cataloger.go +++ b/syft/cataloger/cataloger.go @@ -9,7 +9,7 @@ import ( "github.com/anchore/stereoscope/pkg/file" "github.com/anchore/syft/syft/cataloger/apkdb" "github.com/anchore/syft/syft/cataloger/bundler" - "github.com/anchore/syft/syft/cataloger/dpkg" + "github.com/anchore/syft/syft/cataloger/deb" "github.com/anchore/syft/syft/cataloger/golang" "github.com/anchore/syft/syft/cataloger/java" "github.com/anchore/syft/syft/cataloger/javascript" @@ -36,13 +36,13 @@ type Cataloger interface { // All returns a slice of all locally defined catalogers (defined in child packages). func All() []Cataloger { return []Cataloger{ - dpkg.New(), - bundler.New(), - python.New(), - rpmdb.New(), - java.New(), - apkdb.New(), - golang.New(), - javascript.New(), + deb.NewDpkgdbCataloger(), + bundler.NewGemfileLockCataloger(), + python.NewPythonCataloger(), + rpmdb.NewRpmdbCataloger(), + java.NewJavaCataloger(), + apkdb.NewApkdbCataloger(), + golang.NewGoModCataloger(), + javascript.NewJavascriptCataloger(), } } diff --git a/syft/cataloger/common/generic_cataloger.go b/syft/cataloger/common/generic_cataloger.go index 69b2a7fef..1255f0794 100644 --- a/syft/cataloger/common/generic_cataloger.go +++ b/syft/cataloger/common/generic_cataloger.go @@ -15,22 +15,29 @@ import ( // GenericCataloger implements the Catalog interface and is responsible for dispatching the proper parser function for // a given path or glob pattern. This is intended to be reusable across many package cataloger types. type GenericCataloger struct { - globParsers map[string]ParserFn - pathParsers map[string]ParserFn - selectedFiles []file.Reference - parsers map[file.Reference]ParserFn + globParsers map[string]ParserFn + pathParsers map[string]ParserFn + selectedFiles []file.Reference + parsers map[file.Reference]ParserFn + upstreamMatcher string } // NewGenericCataloger if provided path-to-parser-function and glob-to-parser-function lookups creates a GenericCataloger -func NewGenericCataloger(pathParsers map[string]ParserFn, globParsers map[string]ParserFn) GenericCataloger { - return GenericCataloger{ - globParsers: globParsers, - pathParsers: pathParsers, - selectedFiles: make([]file.Reference, 0), - parsers: make(map[file.Reference]ParserFn), +func NewGenericCataloger(pathParsers map[string]ParserFn, globParsers map[string]ParserFn, upstreamMatcher string) *GenericCataloger { + return &GenericCataloger{ + globParsers: globParsers, + pathParsers: pathParsers, + selectedFiles: make([]file.Reference, 0), + parsers: make(map[file.Reference]ParserFn), + upstreamMatcher: upstreamMatcher, } } +// Name returns a string that uniquely describes the upstream cataloger that this Generic Cataloger represents. +func (a *GenericCataloger) Name() string { + return a.upstreamMatcher +} + // register pairs a set of file references with a parser function for future cataloging (when the file contents are resolved) func (a *GenericCataloger) register(files []file.Reference, parser ParserFn) { a.selectedFiles = append(a.selectedFiles, files...) @@ -73,7 +80,7 @@ func (a *GenericCataloger) SelectFiles(resolver scope.FileResolver) []file.Refer } // Catalog takes a set of file contents and uses any configured parser functions to resolve and return discovered packages -func (a *GenericCataloger) Catalog(contents map[file.Reference]string, upstreamMatcher string) ([]pkg.Package, error) { +func (a *GenericCataloger) Catalog(contents map[file.Reference]string) ([]pkg.Package, error) { defer a.clear() packages := make([]pkg.Package, 0) @@ -81,19 +88,19 @@ func (a *GenericCataloger) Catalog(contents map[file.Reference]string, upstreamM for reference, parser := range a.parsers { content, ok := contents[reference] if !ok { - log.Errorf("cataloger '%s' missing file content: %+v", upstreamMatcher, reference) + log.Errorf("cataloger '%s' missing file content: %+v", a.upstreamMatcher, reference) continue } entries, err := parser(string(reference.Path), strings.NewReader(content)) if err != nil { // TODO: should we fail? or only log? - log.Errorf("cataloger '%s' failed to parse entries (reference=%+v): %+v", upstreamMatcher, reference, err) + log.Errorf("cataloger '%s' failed to parse entries (reference=%+v): %+v", a.upstreamMatcher, reference, err) continue } for _, entry := range entries { - entry.FoundBy = upstreamMatcher + entry.FoundBy = a.upstreamMatcher entry.Source = []file.Reference{reference} packages = append(packages, entry) diff --git a/syft/cataloger/common/generic_cataloger_test.go b/syft/cataloger/common/generic_cataloger_test.go index 849c0e820..57c724f07 100644 --- a/syft/cataloger/common/generic_cataloger_test.go +++ b/syft/cataloger/common/generic_cataloger_test.go @@ -60,9 +60,9 @@ func TestGenericCataloger(t *testing.T) { "/another-path.txt": parser, "/last/path.txt": parser, } - + upstream := "some-other-cataloger" resolver := newTestResolver() - cataloger := NewGenericCataloger(pathParsers, globParsers) + cataloger := NewGenericCataloger(pathParsers, globParsers, upstream) selected := cataloger.SelectFiles(resolver) @@ -79,7 +79,6 @@ func TestGenericCataloger(t *testing.T) { selectionByPath[string(s.Path)] = s } - upstream := "some-other-cataloger" expectedPkgs := make(map[file.Reference]pkg.Package) for path, ref := range selectionByPath { expectedPkgs[ref] = pkg.Package{ @@ -89,7 +88,7 @@ func TestGenericCataloger(t *testing.T) { } } - actualPkgs, err := cataloger.Catalog(resolver.contents, upstream) + actualPkgs, err := cataloger.Catalog(resolver.contents) if err != nil { t.Fatalf("cataloger catalog action failed: %+v", err) } diff --git a/syft/cataloger/deb/cataloger.go b/syft/cataloger/deb/cataloger.go new file mode 100644 index 000000000..b1332c572 --- /dev/null +++ b/syft/cataloger/deb/cataloger.go @@ -0,0 +1,17 @@ +/* +Package dpkg provides a concrete Cataloger implementation for Debian package DB status files. +*/ +package deb + +import ( + "github.com/anchore/syft/syft/cataloger/common" +) + +// NewDpkgdbCataloger returns a new Deb package cataloger object. +func NewDpkgdbCataloger() *common.GenericCataloger { + globParsers := map[string]common.ParserFn{ + "**/var/lib/dpkg/status": parseDpkgStatus, + } + + return common.NewGenericCataloger(nil, globParsers, "dpkgdb-cataloger") +} diff --git a/syft/cataloger/dpkg/parse_dpkg_status.go b/syft/cataloger/deb/parse_dpkg_status.go similarity index 99% rename from syft/cataloger/dpkg/parse_dpkg_status.go rename to syft/cataloger/deb/parse_dpkg_status.go index ded470170..20c3b205d 100644 --- a/syft/cataloger/dpkg/parse_dpkg_status.go +++ b/syft/cataloger/deb/parse_dpkg_status.go @@ -1,4 +1,4 @@ -package dpkg +package deb import ( "bufio" diff --git a/syft/cataloger/dpkg/parse_dpkg_status_test.go b/syft/cataloger/deb/parse_dpkg_status_test.go similarity index 99% rename from syft/cataloger/dpkg/parse_dpkg_status_test.go rename to syft/cataloger/deb/parse_dpkg_status_test.go index 1532650a4..f08e69374 100644 --- a/syft/cataloger/dpkg/parse_dpkg_status_test.go +++ b/syft/cataloger/deb/parse_dpkg_status_test.go @@ -1,4 +1,4 @@ -package dpkg +package deb import ( "bufio" diff --git a/syft/cataloger/dpkg/test-fixtures/multiple b/syft/cataloger/deb/test-fixtures/multiple similarity index 100% rename from syft/cataloger/dpkg/test-fixtures/multiple rename to syft/cataloger/deb/test-fixtures/multiple diff --git a/syft/cataloger/dpkg/test-fixtures/single b/syft/cataloger/deb/test-fixtures/single similarity index 100% rename from syft/cataloger/dpkg/test-fixtures/single rename to syft/cataloger/deb/test-fixtures/single diff --git a/syft/cataloger/dpkg/cataloger.go b/syft/cataloger/dpkg/cataloger.go deleted file mode 100644 index e45ab5aa6..000000000 --- a/syft/cataloger/dpkg/cataloger.go +++ /dev/null @@ -1,42 +0,0 @@ -/* -Package dpkg provides a concrete Cataloger implementation for Debian package DB status files. -*/ -package dpkg - -import ( - "github.com/anchore/stereoscope/pkg/file" - "github.com/anchore/syft/syft/cataloger/common" - "github.com/anchore/syft/syft/pkg" - "github.com/anchore/syft/syft/scope" -) - -// Cataloger catalogs pkg.DebPkg Package Types defined in DPKG status files. -type Cataloger struct { - cataloger common.GenericCataloger -} - -// New returns a new Deb package cataloger object. -func New() *Cataloger { - globParsers := map[string]common.ParserFn{ - "**/var/lib/dpkg/status": parseDpkgStatus, - } - - return &Cataloger{ - cataloger: common.NewGenericCataloger(nil, globParsers), - } -} - -// Name returns a string that uniquely describes this cataloger. -func (a *Cataloger) Name() string { - return "dpkg-cataloger" -} - -// SelectFiles returns a set of discovered DPKG status files from the user content source. -func (a *Cataloger) SelectFiles(resolver scope.FileResolver) []file.Reference { - return a.cataloger.SelectFiles(resolver) -} - -// Catalog returns the Packages indexed from all DPKG status files discovered. -func (a *Cataloger) Catalog(contents map[file.Reference]string) ([]pkg.Package, error) { - return a.cataloger.Catalog(contents, a.Name()) -} diff --git a/syft/cataloger/golang/cataloger.go b/syft/cataloger/golang/cataloger.go index 8616f4468..268bc1cd0 100644 --- a/syft/cataloger/golang/cataloger.go +++ b/syft/cataloger/golang/cataloger.go @@ -4,39 +4,14 @@ Package golang provides a concrete Cataloger implementation for go.mod files. package golang import ( - "github.com/anchore/stereoscope/pkg/file" "github.com/anchore/syft/syft/cataloger/common" - "github.com/anchore/syft/syft/pkg" - "github.com/anchore/syft/syft/scope" ) -// Cataloger catalogs pkg.GoModulePkg Package Types defined in go.mod files. -type Cataloger struct { - cataloger common.GenericCataloger -} - -// New returns a new Go module cataloger object. -func New() *Cataloger { +// NewGoModCataloger returns a new Go module cataloger object. +func NewGoModCataloger() *common.GenericCataloger { globParsers := map[string]common.ParserFn{ "**/go.mod": parseGoMod, } - return &Cataloger{ - cataloger: common.NewGenericCataloger(nil, globParsers), - } -} - -// Name returns a string that uniquely describes this cataloger. -func (a *Cataloger) Name() string { - return "go-cataloger" -} - -// SelectFiles returns a set of discovered go.mod files from the user content source. -func (a *Cataloger) SelectFiles(resolver scope.FileResolver) []file.Reference { - return a.cataloger.SelectFiles(resolver) -} - -// Catalog returns the Packages indexed from all go.mod files discovered. -func (a *Cataloger) Catalog(contents map[file.Reference]string) ([]pkg.Package, error) { - return a.cataloger.Catalog(contents, a.Name()) + return common.NewGenericCataloger(nil, globParsers, "go-cataloger") } diff --git a/syft/cataloger/java/cataloger.go b/syft/cataloger/java/cataloger.go index eaa5b19f9..35d776e93 100644 --- a/syft/cataloger/java/cataloger.go +++ b/syft/cataloger/java/cataloger.go @@ -4,40 +4,15 @@ Package java provides a concrete Cataloger implementation for Java archives (jar package java import ( - "github.com/anchore/stereoscope/pkg/file" "github.com/anchore/syft/syft/cataloger/common" - "github.com/anchore/syft/syft/pkg" - "github.com/anchore/syft/syft/scope" ) -// Cataloger catalogs pkg.JavaPkg and pkg.JenkinsPluginPkg Package Types defined in java archive files. -type Cataloger struct { - cataloger common.GenericCataloger -} - -// New returns a new Java archive cataloger object. -func New() *Cataloger { +// NewJavaCataloger returns a new Java archive cataloger object. +func NewJavaCataloger() *common.GenericCataloger { globParsers := make(map[string]common.ParserFn) for _, pattern := range archiveFormatGlobs { globParsers[pattern] = parseJavaArchive } - return &Cataloger{ - cataloger: common.NewGenericCataloger(nil, globParsers), - } -} - -// Name returns a string that uniquely describes this cataloger. -func (a *Cataloger) Name() string { - return "java-cataloger" -} - -// SelectFiles returns a set of discovered Java archive files from the user content source. -func (a *Cataloger) SelectFiles(resolver scope.FileResolver) []file.Reference { - return a.cataloger.SelectFiles(resolver) -} - -// Catalog returns the Packages indexed from all Java archive files discovered. -func (a *Cataloger) Catalog(contents map[file.Reference]string) ([]pkg.Package, error) { - return a.cataloger.Catalog(contents, a.Name()) + return common.NewGenericCataloger(nil, globParsers, "java-cataloger") } diff --git a/syft/cataloger/javascript/cataloger.go b/syft/cataloger/javascript/cataloger.go index e09bdf485..86aeb1acd 100644 --- a/syft/cataloger/javascript/cataloger.go +++ b/syft/cataloger/javascript/cataloger.go @@ -4,40 +4,15 @@ Package javascript provides a concrete Cataloger implementation for JavaScript e package javascript import ( - "github.com/anchore/stereoscope/pkg/file" "github.com/anchore/syft/syft/cataloger/common" - "github.com/anchore/syft/syft/pkg" - "github.com/anchore/syft/syft/scope" ) -// Cataloger catalogs pkg.YarnPkg and pkg.NpmPkg Package Types defined in package-lock.json and yarn.lock files. -type Cataloger struct { - cataloger common.GenericCataloger -} - -// New returns a new JavaScript cataloger object. -func New() *Cataloger { +// NewJavascriptCataloger returns a new JavaScript cataloger object. +func NewJavascriptCataloger() *common.GenericCataloger { globParsers := map[string]common.ParserFn{ "**/package-lock.json": parsePackageLock, "**/yarn.lock": parseYarnLock, } - return &Cataloger{ - cataloger: common.NewGenericCataloger(nil, globParsers), - } -} - -// Name returns a string that uniquely describes this cataloger. -func (a *Cataloger) Name() string { - return "javascript-cataloger" -} - -// SelectFiles returns a set of discovered Javascript ecosystem files from the user content source. -func (a *Cataloger) SelectFiles(resolver scope.FileResolver) []file.Reference { - return a.cataloger.SelectFiles(resolver) -} - -// Catalog returns the Packages indexed from all Javascript ecosystem files discovered. -func (a *Cataloger) Catalog(contents map[file.Reference]string) ([]pkg.Package, error) { - return a.cataloger.Catalog(contents, a.Name()) + return common.NewGenericCataloger(nil, globParsers, "javascript-cataloger") } diff --git a/syft/cataloger/python/cataloger.go b/syft/cataloger/python/cataloger.go index 12675b3ed..b4e9f1329 100644 --- a/syft/cataloger/python/cataloger.go +++ b/syft/cataloger/python/cataloger.go @@ -4,19 +4,11 @@ Package python provides a concrete Cataloger implementation for Python ecosystem package python import ( - "github.com/anchore/stereoscope/pkg/file" "github.com/anchore/syft/syft/cataloger/common" - "github.com/anchore/syft/syft/pkg" - "github.com/anchore/syft/syft/scope" ) -// Cataloger catalogs pkg.WheelPkg, pkg.EggPkg, and pkg.PythonRequirementsPkg Package Types defined in Python ecosystem files. -type Cataloger struct { - cataloger common.GenericCataloger -} - -// New returns a new Python cataloger object. -func New() *Cataloger { +// NewPythonCataloger returns a new Python cataloger object. +func NewPythonCataloger() *common.GenericCataloger { globParsers := map[string]common.ParserFn{ "**/*egg-info/PKG-INFO": parseEggMetadata, "**/*dist-info/METADATA": parseWheelMetadata, @@ -25,22 +17,5 @@ func New() *Cataloger { "**/setup.py": parseSetup, } - return &Cataloger{ - cataloger: common.NewGenericCataloger(nil, globParsers), - } -} - -// Name returns a string that uniquely describes this cataloger. -func (a *Cataloger) Name() string { - return "python-cataloger" -} - -// SelectFiles returns a set of discovered Python ecosystem files from the user content source. -func (a *Cataloger) SelectFiles(resolver scope.FileResolver) []file.Reference { - return a.cataloger.SelectFiles(resolver) -} - -// Catalog returns the Packages indexed from all Python ecosystem files discovered. -func (a *Cataloger) Catalog(contents map[file.Reference]string) ([]pkg.Package, error) { - return a.cataloger.Catalog(contents, a.Name()) + return common.NewGenericCataloger(nil, globParsers, "python-cataloger") } diff --git a/syft/cataloger/rpmdb/cataloger.go b/syft/cataloger/rpmdb/cataloger.go index db9a5e867..7c331dade 100644 --- a/syft/cataloger/rpmdb/cataloger.go +++ b/syft/cataloger/rpmdb/cataloger.go @@ -4,39 +4,13 @@ Package rpmdb provides a concrete Cataloger implementation for RPM "Package" DB package rpmdb import ( - "github.com/anchore/stereoscope/pkg/file" "github.com/anchore/syft/syft/cataloger/common" - "github.com/anchore/syft/syft/pkg" - "github.com/anchore/syft/syft/scope" ) -// Cataloger catalogs pkg.RpmPkg Package Types defined in RPM DB files. -type Cataloger struct { - cataloger common.GenericCataloger -} - -// New returns a new RPM DB cataloger object. -func New() *Cataloger { +// NewRpmdbCataloger returns a new RPM DB cataloger object. +func NewRpmdbCataloger() *common.GenericCataloger { globParsers := map[string]common.ParserFn{ "**/var/lib/rpm/Packages": parseRpmDB, } - - return &Cataloger{ - cataloger: common.NewGenericCataloger(nil, globParsers), - } -} - -// Name returns a string that uniquely describes this cataloger. -func (a *Cataloger) Name() string { - return "rpmdb-cataloger" -} - -// SelectFiles returns a set of discovered RPM DB files from the user content source. -func (a *Cataloger) SelectFiles(resolver scope.FileResolver) []file.Reference { - return a.cataloger.SelectFiles(resolver) -} - -// Catalog returns the Packages indexed from all RPM DB files discovered. -func (a *Cataloger) Catalog(contents map[file.Reference]string) ([]pkg.Package, error) { - return a.cataloger.Catalog(contents, a.Name()) + return common.NewGenericCataloger(nil, globParsers, "rpmdb-cataloger") } From 10b44f5311767a321404a24f36ec03b3f86d7dfd Mon Sep 17 00:00:00 2001 From: Alex Goodman Date: Tue, 6 Oct 2020 13:11:35 -0400 Subject: [PATCH 3/8] split catalogers into two sets, one for images another for directory scans Signed-off-by: Alex Goodman --- Makefile | 2 +- syft/cataloger/bundler/parse_gemfile_lock.go | 2 +- .../bundler/parse_gemfile_lock_test.go | 2 +- syft/cataloger/bundler/parse_gemspec.go | 2 +- syft/cataloger/bundler/parse_gemspec_test.go | 2 +- syft/cataloger/cataloger.go | 24 +++- syft/lib.go | 20 +-- syft/pkg/package_test.go | 5 +- syft/pkg/type.go | 6 +- syft/presenter/cyclonedx/presenter.go | 4 +- syft/presenter/json/location.go | 3 +- syft/presenter/json/source.go | 5 +- syft/presenter/text/presenter.go | 3 +- syft/scope/scope.go | 52 +++---- syft/scope/scope_test.go | 46 +++--- test/integration/json_schema_test.go | 8 ++ test/integration/pkg_cases.go | 136 ++++++++++-------- test/integration/pkg_coverage_test.go | 20 ++- .../ruby/specification/bundler.gemspec | 25 ++++ 19 files changed, 218 insertions(+), 149 deletions(-) create mode 100644 test/integration/test-fixtures/image-pkg-coverage/ruby/specification/bundler.gemspec diff --git a/Makefile b/Makefile index ade92a5e0..e421c5050 100644 --- a/Makefile +++ b/Makefile @@ -130,7 +130,7 @@ unit: fixtures ## Run unit tests (with coverage) .PHONY: integration integration: ## Run integration tests $(call title,Running integration tests) - go test -v -tags=integration ./test/integration + go test -tags=integration ./test/integration # note: this is used by CI to determine if the integration test fixture cache (docker image tars) should be busted integration-fingerprint: diff --git a/syft/cataloger/bundler/parse_gemfile_lock.go b/syft/cataloger/bundler/parse_gemfile_lock.go index d59274289..76e8990e5 100644 --- a/syft/cataloger/bundler/parse_gemfile_lock.go +++ b/syft/cataloger/bundler/parse_gemfile_lock.go @@ -44,7 +44,7 @@ func parseGemfileLockEntries(_ string, reader io.Reader) ([]pkg.Package, error) Name: candidate[0], Version: strings.Trim(candidate[1], "()"), Language: pkg.Ruby, - Type: pkg.BundlerPkg, + Type: pkg.GemPkg, }) } } diff --git a/syft/cataloger/bundler/parse_gemfile_lock_test.go b/syft/cataloger/bundler/parse_gemfile_lock_test.go index 428a80c01..4e3faf7e5 100644 --- a/syft/cataloger/bundler/parse_gemfile_lock_test.go +++ b/syft/cataloger/bundler/parse_gemfile_lock_test.go @@ -94,7 +94,7 @@ func TestParseGemfileLockEntries(t *testing.T) { t.Errorf("bad language (pkg=%+v): %+v", a.Name, a.Language) } - if a.Type != pkg.BundlerPkg { + if a.Type != pkg.GemPkg { t.Errorf("bad package type (pkg=%+v): %+v", a.Name, a.Type) } } diff --git a/syft/cataloger/bundler/parse_gemspec.go b/syft/cataloger/bundler/parse_gemspec.go index 91cfa8c01..728668899 100644 --- a/syft/cataloger/bundler/parse_gemspec.go +++ b/syft/cataloger/bundler/parse_gemspec.go @@ -104,7 +104,7 @@ func parseGemspecEntries(_ string, reader io.Reader) ([]pkg.Package, error) { Name: metadata.Name, Version: metadata.Version, Language: pkg.Ruby, - Type: pkg.BundlerPkg, + Type: pkg.GemPkg, Metadata: metadata, }) } diff --git a/syft/cataloger/bundler/parse_gemspec_test.go b/syft/cataloger/bundler/parse_gemspec_test.go index f05bb982b..b792405b3 100644 --- a/syft/cataloger/bundler/parse_gemspec_test.go +++ b/syft/cataloger/bundler/parse_gemspec_test.go @@ -43,7 +43,7 @@ func TestParseGemspec(t *testing.T) { t.Errorf("bad language (pkg=%+v): %+v", a.Name, a.Language) } - if a.Type != pkg.BundlerPkg { + if a.Type != pkg.GemPkg { t.Errorf("bad package type (pkg=%+v): %+v", a.Name, a.Type) } } diff --git a/syft/cataloger/cataloger.go b/syft/cataloger/cataloger.go index 48c1bc341..b4987f236 100644 --- a/syft/cataloger/cataloger.go +++ b/syft/cataloger/cataloger.go @@ -33,16 +33,30 @@ type Cataloger interface { // TODO: we should consider refactoring to return a set of io.Readers instead of the full contents themselves (allow for optional buffering). } -// All returns a slice of all locally defined catalogers (defined in child packages). -func All() []Cataloger { +// ImageCatalogers returns a slice of locally implemented catalogers that are fit for detecting installations of packages. +func ImageCatalogers() []Cataloger { return []Cataloger{ + bundler.NewGemspecCataloger(), + python.NewPythonCataloger(), // TODO: split and replace me + javascript.NewJavascriptCataloger(), // TODO: split and replace me + deb.NewDpkgdbCataloger(), + rpmdb.NewRpmdbCataloger(), + java.NewJavaCataloger(), + apkdb.NewApkdbCataloger(), + golang.NewGoModCataloger(), + } +} + +// DirectoryCatalogers returns a slice of locally implemented catalogers that are fit for detecting packages from index files (and select installations) +func DirectoryCatalogers() []Cataloger { + return []Cataloger{ + bundler.NewGemfileLockCataloger(), + python.NewPythonCataloger(), // TODO: split and replace me + javascript.NewJavascriptCataloger(), // TODO: split and replace me deb.NewDpkgdbCataloger(), - bundler.NewGemfileLockCataloger(), - python.NewPythonCataloger(), rpmdb.NewRpmdbCataloger(), java.NewJavaCataloger(), apkdb.NewApkdbCataloger(), golang.NewGoModCataloger(), - javascript.NewJavascriptCataloger(), } } diff --git a/syft/lib.go b/syft/lib.go index e766247fb..cb7dc66a1 100644 --- a/syft/lib.go +++ b/syft/lib.go @@ -17,6 +17,8 @@ Similar to the cataloging process, Linux distribution identification is also per package syft import ( + "fmt" + "github.com/anchore/syft/internal/bus" "github.com/anchore/syft/internal/log" "github.com/anchore/syft/syft/cataloger" @@ -64,15 +66,17 @@ func CatalogFromScope(s scope.Scope) (*pkg.Catalog, error) { log.Info("building the catalog") // conditionally have two sets of catalogers - //var catalogers []cataloger.Cataloger - //// if image - //// use one set of catalogers - //catalogers = ... - // - //// if dir - //// use another set of catalogers + var catalogers []cataloger.Cataloger + switch s.Scheme { + case scope.ImageScheme: + catalogers = cataloger.ImageCatalogers() + case scope.DirectoryScheme: + catalogers = cataloger.DirectoryCatalogers() + default: + return nil, fmt.Errorf("unable to determine cataloger set from scheme=%+v", s.Scheme) + } - return cataloger.Catalog(s.Resolver, cataloger.All()...) + return cataloger.Catalog(s.Resolver, catalogers...) } // SetLogger sets the logger object used for all syft logging calls. diff --git a/syft/pkg/package_test.go b/syft/pkg/package_test.go index 89a712eee..220146725 100644 --- a/syft/pkg/package_test.go +++ b/syft/pkg/package_test.go @@ -1,9 +1,10 @@ package pkg import ( + "testing" + "github.com/anchore/syft/syft/distro" "github.com/sergi/go-diff/diffmatchpatch" - "testing" ) func TestPackage_pURL(t *testing.T) { @@ -56,7 +57,7 @@ func TestPackage_pURL(t *testing.T) { pkg: Package{ Name: "name", Version: "v0.1.0", - Type: BundlerPkg, + Type: GemPkg, }, expected: "pkg:gem/name@v0.1.0", }, diff --git a/syft/pkg/type.go b/syft/pkg/type.go index 08a126c8f..61ed24878 100644 --- a/syft/pkg/type.go +++ b/syft/pkg/type.go @@ -8,7 +8,7 @@ type Type string const ( UnknownPkg Type = "UnknownPackage" ApkPkg Type = "apk" - BundlerPkg Type = "bundle" + GemPkg Type = "gem" DebPkg Type = "deb" EggPkg Type = "egg" // PacmanPkg Type = "pacman" @@ -26,7 +26,7 @@ const ( var AllPkgs = []Type{ ApkPkg, - BundlerPkg, + GemPkg, DebPkg, EggPkg, // PacmanPkg, @@ -45,7 +45,7 @@ func (t Type) PackageURLType() string { switch t { case ApkPkg: return "alpine" - case BundlerPkg: + case GemPkg: return packageurl.TypeGem case DebPkg: return "deb" diff --git a/syft/presenter/cyclonedx/presenter.go b/syft/presenter/cyclonedx/presenter.go index cf02716d1..1b8accebe 100644 --- a/syft/presenter/cyclonedx/presenter.go +++ b/syft/presenter/cyclonedx/presenter.go @@ -34,9 +34,7 @@ func NewPresenter(catalog *pkg.Catalog, s scope.Scope, d distro.Distro) *Present func (pres *Presenter) Present(output io.Writer) error { bom := NewDocumentFromCatalog(pres.catalog, pres.distro) - srcObj := pres.scope.Source() - - switch src := srcObj.(type) { + switch src := pres.scope.Source.(type) { case scope.DirSource: bom.BomDescriptor.Component = &BdComponent{ Component: Component{ diff --git a/syft/presenter/json/location.go b/syft/presenter/json/location.go index 1987ec297..8a0fe1209 100644 --- a/syft/presenter/json/location.go +++ b/syft/presenter/json/location.go @@ -15,8 +15,7 @@ type ImageLocation struct { } func NewLocations(p *pkg.Package, s scope.Scope) (Locations, error) { - srcObj := s.Source() - switch src := srcObj.(type) { + switch src := s.Source.(type) { case scope.ImageSource: locations := make([]ImageLocation, len(p.Source)) for idx := range p.Source { diff --git a/syft/presenter/json/source.go b/syft/presenter/json/source.go index a44ad8676..471390999 100644 --- a/syft/presenter/json/source.go +++ b/syft/presenter/json/source.go @@ -12,8 +12,7 @@ type Source struct { } func NewSource(s scope.Scope) (Source, error) { - srcObj := s.Source() - switch src := srcObj.(type) { + switch src := s.Source.(type) { case scope.ImageSource: return Source{ Type: "image", @@ -22,7 +21,7 @@ func NewSource(s scope.Scope) (Source, error) { case scope.DirSource: return Source{ Type: "directory", - Target: s.DirSrc.Path, + Target: src.Path, }, nil default: return Source{}, fmt.Errorf("unsupported source: %T", src) diff --git a/syft/presenter/text/presenter.go b/syft/presenter/text/presenter.go index 2376f1aac..3291370d9 100644 --- a/syft/presenter/text/presenter.go +++ b/syft/presenter/text/presenter.go @@ -27,9 +27,8 @@ func (pres *Presenter) Present(output io.Writer) error { // init the tabular writer w := new(tabwriter.Writer) w.Init(output, 0, 8, 0, '\t', tabwriter.AlignRight) - srcObj := pres.scope.Source() - switch src := srcObj.(type) { + switch src := pres.scope.Source.(type) { case scope.DirSource: fmt.Fprintln(w, fmt.Sprintf("[Path: %s]", src.Path)) case scope.ImageSource: diff --git a/syft/scope/scope.go b/syft/scope/scope.go index 88406d4eb..bf2ed74f3 100644 --- a/syft/scope/scope.go +++ b/syft/scope/scope.go @@ -20,12 +20,12 @@ import ( ) const ( - unknownScheme scheme = "unknown-scheme" - directoryScheme scheme = "directory-scheme" - imageScheme scheme = "image-scheme" + UnknownScheme Scheme = "unknown-scheme" + DirectoryScheme Scheme = "directory-scheme" + ImageScheme Scheme = "image-scheme" ) -type scheme string +type Scheme string // ImageSource represents a data source that is a container image type ImageSource struct { @@ -42,8 +42,8 @@ type DirSource struct { type Scope struct { Option Option // specific perspective to catalog Resolver Resolver // a Resolver object to use in file path/glob resolution and file contents resolution - ImgSrc ImageSource // the specific image to be cataloged - DirSrc DirSource // the specific directory to be cataloged + Source interface{} // the specific source object to be cataloged + Scheme Scheme // the source data scheme type (directory or image) } // NewScope produces a Scope based on userInput like dir: or image:tag @@ -55,7 +55,7 @@ func NewScope(userInput string, o Option) (Scope, func(), error) { } switch parsedScheme { - case directoryScheme: + case DirectoryScheme: fileMeta, err := fs.Stat(location) if err != nil { return Scope{}, func() {}, fmt.Errorf("unable to stat dir=%q: %w", location, err) @@ -71,7 +71,7 @@ func NewScope(userInput string, o Option) (Scope, func(), error) { } return s, func() {}, nil - case imageScheme: + case ImageScheme: img, err := stereoscope.GetImage(location) cleanup := func() { stereoscope.Cleanup() @@ -97,9 +97,10 @@ func NewScopeFromDir(path string) (Scope, error) { Resolver: &resolvers.DirectoryResolver{ Path: path, }, - DirSrc: DirSource{ + Source: DirSource{ Path: path, }, + Scheme: DirectoryScheme, }, nil } @@ -118,59 +119,48 @@ func NewScopeFromImage(img *image.Image, option Option) (Scope, error) { return Scope{ Option: option, Resolver: resolver, - ImgSrc: ImageSource{ + Source: ImageSource{ Img: img, }, + Scheme: ImageScheme, }, nil } -// Source returns the configured data source (either a dir source or container image source) -func (s Scope) Source() interface{} { - if s.ImgSrc != (ImageSource{}) { - return s.ImgSrc - } - if s.DirSrc != (DirSource{}) { - return s.DirSrc - } - - return nil -} - type sourceDetector func(string) (image.Source, string, error) -func detectScheme(fs afero.Fs, imageDetector sourceDetector, userInput string) (scheme, string, error) { +func detectScheme(fs afero.Fs, imageDetector sourceDetector, userInput string) (Scheme, string, error) { if strings.HasPrefix(userInput, "dir:") { // blindly trust the user's scheme dirLocation, err := homedir.Expand(strings.TrimPrefix(userInput, "dir:")) if err != nil { - return unknownScheme, "", fmt.Errorf("unable to expand directory path: %w", err) + return UnknownScheme, "", fmt.Errorf("unable to expand directory path: %w", err) } - return directoryScheme, dirLocation, nil + return DirectoryScheme, dirLocation, nil } // we should attempt to let stereoscope determine what the source is first --just because the source is a valid directory // doesn't mean we yet know if it is an OCI layout directory (to be treated as an image) or if it is a generic filesystem directory. source, imageSpec, err := imageDetector(userInput) if err != nil { - return unknownScheme, "", fmt.Errorf("unable to detect the scheme from %q: %w", userInput, err) + return UnknownScheme, "", fmt.Errorf("unable to detect the scheme from %q: %w", userInput, err) } if source == image.UnknownSource { dirLocation, err := homedir.Expand(userInput) if err != nil { - return unknownScheme, "", fmt.Errorf("unable to expand potential directory path: %w", err) + return UnknownScheme, "", fmt.Errorf("unable to expand potential directory path: %w", err) } fileMeta, err := fs.Stat(dirLocation) if err != nil { - return unknownScheme, "", nil + return UnknownScheme, "", nil } if fileMeta.IsDir() { - return directoryScheme, dirLocation, nil + return DirectoryScheme, dirLocation, nil } - return unknownScheme, "", nil + return UnknownScheme, "", nil } - return imageScheme, imageSpec, nil + return ImageScheme, imageSpec, nil } diff --git a/syft/scope/scope_test.go b/syft/scope/scope_test.go index 017d45e0a..deb55c1d5 100644 --- a/syft/scope/scope_test.go +++ b/syft/scope/scope_test.go @@ -1,13 +1,13 @@ package scope import ( - "github.com/mitchellh/go-homedir" - "github.com/spf13/afero" "os" "testing" "github.com/anchore/stereoscope/pkg/file" "github.com/anchore/stereoscope/pkg/image" + "github.com/mitchellh/go-homedir" + "github.com/spf13/afero" ) func TestNewScopeFromImageFails(t *testing.T) { @@ -78,8 +78,8 @@ func TestDirectoryScope(t *testing.T) { if err != nil { t.Errorf("could not create NewDirScope: %w", err) } - if p.DirSrc.Path != test.input { - t.Errorf("mismatched stringer: '%s' != '%s'", p.DirSrc.Path, test.input) + if p.Source.(DirSource).Path != test.input { + t.Errorf("mismatched stringer: '%s' != '%s'", p.Source.(DirSource).Path, test.input) } refs, err := p.Resolver.FilesByPath(test.inputPaths...) @@ -229,7 +229,7 @@ func TestDetectScheme(t *testing.T) { userInput string dirs []string detection detectorResult - expectedScheme scheme + expectedScheme Scheme expectedLocation string }{ { @@ -239,7 +239,7 @@ func TestDetectScheme(t *testing.T) { src: image.DockerDaemonSource, ref: "wagoodman/dive:latest", }, - expectedScheme: imageScheme, + expectedScheme: ImageScheme, expectedLocation: "wagoodman/dive:latest", }, { @@ -249,7 +249,7 @@ func TestDetectScheme(t *testing.T) { src: image.DockerDaemonSource, ref: "wagoodman/dive", }, - expectedScheme: imageScheme, + expectedScheme: ImageScheme, expectedLocation: "wagoodman/dive", }, { @@ -259,7 +259,7 @@ func TestDetectScheme(t *testing.T) { src: image.DockerDaemonSource, ref: "wagoodman/dive:latest", }, - expectedScheme: imageScheme, + expectedScheme: ImageScheme, expectedLocation: "wagoodman/dive:latest", }, { @@ -269,7 +269,7 @@ func TestDetectScheme(t *testing.T) { src: image.DockerDaemonSource, ref: "wagoodman/dive", }, - expectedScheme: imageScheme, + expectedScheme: ImageScheme, expectedLocation: "wagoodman/dive", }, { @@ -279,7 +279,7 @@ func TestDetectScheme(t *testing.T) { src: image.DockerDaemonSource, ref: "latest", }, - expectedScheme: imageScheme, + expectedScheme: ImageScheme, // we want to be able to handle this case better, however, I don't see a way to do this // the user will need to provide more explicit input (docker:docker:latest) expectedLocation: "latest", @@ -291,7 +291,7 @@ func TestDetectScheme(t *testing.T) { src: image.DockerDaemonSource, ref: "docker:latest", }, - expectedScheme: imageScheme, + expectedScheme: ImageScheme, // we want to be able to handle this case better, however, I don't see a way to do this // the user will need to provide more explicit input (docker:docker:latest) expectedLocation: "docker:latest", @@ -303,7 +303,7 @@ func TestDetectScheme(t *testing.T) { src: image.OciTarballSource, ref: "some/path-to-file", }, - expectedScheme: imageScheme, + expectedScheme: ImageScheme, expectedLocation: "some/path-to-file", }, { @@ -314,7 +314,7 @@ func TestDetectScheme(t *testing.T) { ref: "some/path-to-dir", }, dirs: []string{"some/path-to-dir"}, - expectedScheme: imageScheme, + expectedScheme: ImageScheme, expectedLocation: "some/path-to-dir", }, { @@ -325,7 +325,7 @@ func TestDetectScheme(t *testing.T) { ref: "", }, dirs: []string{"some/path-to-dir"}, - expectedScheme: directoryScheme, + expectedScheme: DirectoryScheme, expectedLocation: "some/path-to-dir", }, { @@ -335,7 +335,7 @@ func TestDetectScheme(t *testing.T) { src: image.DockerDaemonSource, ref: "some/path-to-dir", }, - expectedScheme: imageScheme, + expectedScheme: ImageScheme, expectedLocation: "some/path-to-dir", }, { @@ -346,7 +346,7 @@ func TestDetectScheme(t *testing.T) { ref: "", }, dirs: []string{"some/path-to-dir"}, - expectedScheme: directoryScheme, + expectedScheme: DirectoryScheme, expectedLocation: "some/path-to-dir", }, { @@ -356,7 +356,7 @@ func TestDetectScheme(t *testing.T) { src: image.UnknownSource, ref: "", }, - expectedScheme: directoryScheme, + expectedScheme: DirectoryScheme, expectedLocation: ".", }, { @@ -366,7 +366,7 @@ func TestDetectScheme(t *testing.T) { src: image.UnknownSource, ref: "", }, - expectedScheme: directoryScheme, + expectedScheme: DirectoryScheme, expectedLocation: ".", }, // we should support tilde expansion @@ -377,7 +377,7 @@ func TestDetectScheme(t *testing.T) { src: image.OciDirectorySource, ref: "~/some-path", }, - expectedScheme: imageScheme, + expectedScheme: ImageScheme, expectedLocation: "~/some-path", }, { @@ -388,26 +388,26 @@ func TestDetectScheme(t *testing.T) { ref: "", }, dirs: []string{"~/some-path"}, - expectedScheme: directoryScheme, + expectedScheme: DirectoryScheme, expectedLocation: "~/some-path", }, { name: "tilde-expansion-dir-explicit-exists", userInput: "dir:~/some-path", dirs: []string{"~/some-path"}, - expectedScheme: directoryScheme, + expectedScheme: DirectoryScheme, expectedLocation: "~/some-path", }, { name: "tilde-expansion-dir-explicit-dne", userInput: "dir:~/some-path", - expectedScheme: directoryScheme, + expectedScheme: DirectoryScheme, expectedLocation: "~/some-path", }, { name: "tilde-expansion-dir-implicit-dne", userInput: "~/some-path", - expectedScheme: unknownScheme, + expectedScheme: UnknownScheme, expectedLocation: "", }, } diff --git a/test/integration/json_schema_test.go b/test/integration/json_schema_test.go index 63e93b0c6..8545c0978 100644 --- a/test/integration/json_schema_test.go +++ b/test/integration/json_schema_test.go @@ -108,6 +108,10 @@ func TestJsonSchemaImg(t *testing.T) { t.Fatalf("failed to catalog image: %+v", err) } + var cases []testCase + cases = append(cases, commonTestCases...) + cases = append(cases, imageOnlyTestCases...) + for _, c := range cases { t.Run(c.name, func(t *testing.T) { testJsonSchema(t, catalog, theScope, "img") @@ -121,6 +125,10 @@ func TestJsonSchemaDirs(t *testing.T) { t.Errorf("unable to create scope from dir: %+v", err) } + var cases []testCase + cases = append(cases, commonTestCases...) + cases = append(cases, dirOnlyTestCases...) + for _, c := range cases { t.Run(c.name, func(t *testing.T) { testJsonSchema(t, catalog, theScope, "dir") diff --git a/test/integration/pkg_cases.go b/test/integration/pkg_cases.go index 6981304d2..54762a151 100644 --- a/test/integration/pkg_cases.go +++ b/test/integration/pkg_cases.go @@ -4,12 +4,86 @@ package integration import "github.com/anchore/syft/syft/pkg" -var cases = []struct { +type testCase struct { name string pkgType pkg.Type pkgLanguage pkg.Language pkgInfo map[string]string -}{ +} + +var imageOnlyTestCases = []testCase{ + { + name: "find gemspec packages", + pkgType: pkg.GemPkg, + pkgLanguage: pkg.Ruby, + pkgInfo: map[string]string{ + "bundler": "2.1.4", + }, + }, +} + +var dirOnlyTestCases = []testCase{ + { + name: "find gemfile packages", + pkgType: pkg.GemPkg, + pkgLanguage: pkg.Ruby, + pkgInfo: map[string]string{ + "actionmailer": "4.1.1", + "actionpack": "4.1.1", + "actionview": "4.1.1", + "activemodel": "4.1.1", + "activerecord": "4.1.1", + "activesupport": "4.1.1", + "arel": "5.0.1.20140414130214", + "bootstrap-sass": "3.1.1.1", + "builder": "3.2.2", + "coffee-rails": "4.0.1", + "coffee-script": "2.2.0", + "coffee-script-source": "1.7.0", + "erubis": "2.7.0", + "execjs": "2.0.2", + "hike": "1.2.3", + "i18n": "0.6.9", + "jbuilder": "2.0.7", + "jquery-rails": "3.1.0", + "json": "1.8.1", + "kgio": "2.9.2", + "libv8": "3.16.14.3", + "mail": "2.5.4", + "mime-types": "1.25.1", + "minitest": "5.3.4", + "multi_json": "1.10.1", + "mysql2": "0.3.16", + "polyglot": "0.3.4", + "rack": "1.5.2", + "rack-test": "0.6.2", + "rails": "4.1.1", + "railties": "4.1.1", + "raindrops": "0.13.0", + "rake": "10.3.2", + "rdoc": "4.1.1", + "ref": "1.0.5", + "sass": "3.2.19", + "sass-rails": "4.0.3", + "sdoc": "0.4.0", + "spring": "1.1.3", + "sprockets": "2.11.0", + "sprockets-rails": "2.1.3", + "sqlite3": "1.3.9", + "therubyracer": "0.12.1", + "thor": "0.19.1", + "thread_safe": "0.3.3", + "tilt": "1.4.1", + "treetop": "1.4.15", + "turbolinks": "2.2.2", + "tzinfo": "1.2.0", + "uglifier": "2.5.0", + "unicorn": "4.8.3", + }, + }, +} + +var commonTestCases = []testCase{ { name: "find rpmdb packages", pkgType: pkg.RpmPkg, @@ -98,64 +172,6 @@ var cases = []struct { "mypy": "v0.770", }, }, - { - name: "find bundler packages", - pkgType: pkg.BundlerPkg, - pkgLanguage: pkg.Ruby, - pkgInfo: map[string]string{ - "actionmailer": "4.1.1", - "actionpack": "4.1.1", - "actionview": "4.1.1", - "activemodel": "4.1.1", - "activerecord": "4.1.1", - "activesupport": "4.1.1", - "arel": "5.0.1.20140414130214", - "bootstrap-sass": "3.1.1.1", - "builder": "3.2.2", - "coffee-rails": "4.0.1", - "coffee-script": "2.2.0", - "coffee-script-source": "1.7.0", - "erubis": "2.7.0", - "execjs": "2.0.2", - "hike": "1.2.3", - "i18n": "0.6.9", - "jbuilder": "2.0.7", - "jquery-rails": "3.1.0", - "json": "1.8.1", - "kgio": "2.9.2", - "libv8": "3.16.14.3", - "mail": "2.5.4", - "mime-types": "1.25.1", - "minitest": "5.3.4", - "multi_json": "1.10.1", - "mysql2": "0.3.16", - "polyglot": "0.3.4", - "rack": "1.5.2", - "rack-test": "0.6.2", - "rails": "4.1.1", - "railties": "4.1.1", - "raindrops": "0.13.0", - "rake": "10.3.2", - "rdoc": "4.1.1", - "ref": "1.0.5", - "sass": "3.2.19", - "sass-rails": "4.0.3", - "sdoc": "0.4.0", - "spring": "1.1.3", - "sprockets": "2.11.0", - "sprockets-rails": "2.1.3", - "sqlite3": "1.3.9", - "therubyracer": "0.12.1", - "thor": "0.19.1", - "thread_safe": "0.3.3", - "tilt": "1.4.1", - "treetop": "1.4.15", - "turbolinks": "2.2.2", - "tzinfo": "1.2.0", - "uglifier": "2.5.0", - "unicorn": "4.8.3", - }, - }, { name: "find apkdb packages", diff --git a/test/integration/pkg_coverage_test.go b/test/integration/pkg_coverage_test.go index 333c03bc6..f79fbd55d 100644 --- a/test/integration/pkg_coverage_test.go +++ b/test/integration/pkg_coverage_test.go @@ -3,9 +3,11 @@ package integration import ( - "github.com/anchore/stereoscope/pkg/imagetest" "testing" + "github.com/anchore/stereoscope/pkg/imagetest" + "github.com/go-test/deep" + "github.com/anchore/syft/internal" "github.com/anchore/syft/syft" "github.com/anchore/syft/syft/pkg" @@ -35,6 +37,10 @@ func TestPkgCoverageImage(t *testing.T) { definedPkgs.Add(string(p)) } + var cases []testCase + cases = append(cases, commonTestCases...) + cases = append(cases, imageOnlyTestCases...) + for _, c := range cases { t.Run(c.name, func(t *testing.T) { pkgCount := 0 @@ -81,10 +87,16 @@ func TestPkgCoverageImage(t *testing.T) { // ensure that integration test cases stay in sync with the available catalogers if len(observedLanguages) < len(definedLanguages) { t.Errorf("language coverage incomplete (languages=%d, coverage=%d)", len(definedLanguages), len(observedLanguages)) + for _, d := range deep.Equal(observedLanguages, definedLanguages) { + t.Errorf("diff: %+v", d) + } } if len(observedPkgs) < len(definedPkgs) { t.Errorf("package coverage incomplete (packages=%d, coverage=%d)", len(definedPkgs), len(observedPkgs)) + for _, d := range deep.Equal(observedPkgs, definedPkgs) { + t.Errorf("diff: %+v", d) + } } } @@ -107,6 +119,10 @@ func TestPkgCoverageDirectory(t *testing.T) { definedPkgs.Add(string(p)) } + var cases []testCase + cases = append(cases, commonTestCases...) + cases = append(cases, dirOnlyTestCases...) + for _, c := range cases { t.Run(c.name, func(t *testing.T) { pkgCount := 0 @@ -150,7 +166,7 @@ func TestPkgCoverageDirectory(t *testing.T) { observedPkgs.Remove(string(pkg.UnknownPkg)) definedPkgs.Remove(string(pkg.UnknownPkg)) - // ensure that integration test cases stay in sync with the available catalogers + // ensure that integration test commonTestCases stay in sync with the available catalogers if len(observedLanguages) < len(definedLanguages) { t.Errorf("language coverage incomplete (languages=%d, coverage=%d)", len(definedLanguages), len(observedLanguages)) } diff --git a/test/integration/test-fixtures/image-pkg-coverage/ruby/specification/bundler.gemspec b/test/integration/test-fixtures/image-pkg-coverage/ruby/specification/bundler.gemspec new file mode 100644 index 000000000..450b81096 --- /dev/null +++ b/test/integration/test-fixtures/image-pkg-coverage/ruby/specification/bundler.gemspec @@ -0,0 +1,25 @@ +# frozen_string_literal: true +# -*- encoding: utf-8 -*- +# stub: bundler 2.1.4 ruby lib + +Gem::Specification.new do |s| + s.name = "bundler".freeze + s.version = "2.1.4" + + s.required_rubygems_version = Gem::Requirement.new(">= 2.5.2".freeze) if s.respond_to? :required_rubygems_version= + s.require_paths = ["lib".freeze] + s.authors = ["Andr\u00E9 Arko".freeze, "Samuel Giddins".freeze, "Colby Swandale".freeze, "Hiroshi Shibata".freeze, "David Rodr\u00EDguez".freeze, "Grey Baker".f + s.bindir = "exe".freeze + s.date = "2020-01-05" + s.description = "Bundler manages an application's dependencies through its entire life, across many machines, systematically and repeatably".freeze + s.email = ["team@bundler.io".freeze] + s.executables = ["bundle".freeze, "bundler".freeze] + s.files = ["exe/bundle".freeze, "exe/bundler".freeze] + s.homepage = "https://bundler.io".freeze + s.licenses = ["MIT".freeze] + s.required_ruby_version = Gem::Requirement.new(">= 2.3.0".freeze) + s.rubygems_version = "3.1.2".freeze + s.summary = "The best way to manage your application's dependencies".freeze + + s.installed_by_version = "3.1.2" if s.respond_to? :installed_by_version + end \ No newline at end of file From abdd00cd24eebff55bc3882edc343aab7badca07 Mon Sep 17 00:00:00 2001 From: Alex Goodman Date: Tue, 6 Oct 2020 13:20:03 -0400 Subject: [PATCH 4/8] rename gem parsers and catalogers Signed-off-by: Alex Goodman --- .../bundler/{cataloger.go => catalogers.go} | 12 ++++---- syft/cataloger/bundler/parse_gemfile_lock.go | 6 ++-- .../bundler/parse_gemfile_lock_test.go | 2 +- syft/cataloger/bundler/parse_gemspec.go | 4 +-- syft/cataloger/bundler/parse_gemspec_test.go | 2 +- syft/cataloger/cataloger.go | 4 +-- syft/cataloger/common/generic_cataloger.go | 30 +++++++++---------- 7 files changed, 30 insertions(+), 30 deletions(-) rename syft/cataloger/bundler/{cataloger.go => catalogers.go} (62%) diff --git a/syft/cataloger/bundler/cataloger.go b/syft/cataloger/bundler/catalogers.go similarity index 62% rename from syft/cataloger/bundler/cataloger.go rename to syft/cataloger/bundler/catalogers.go index dcaced018..ae8554226 100644 --- a/syft/cataloger/bundler/cataloger.go +++ b/syft/cataloger/bundler/catalogers.go @@ -7,19 +7,19 @@ import ( "github.com/anchore/syft/syft/cataloger/common" ) -// NewGemfileLockCataloger returns a new Bundler cataloger object tailored for parsing index-oriented files (e.g. Gemfile.lock). -func NewGemfileLockCataloger() *common.GenericCataloger { +// NewGemFileLockCataloger returns a new Bundler cataloger object tailored for parsing index-oriented files (e.g. Gemfile.lock). +func NewGemFileLockCataloger() *common.GenericCataloger { globParsers := map[string]common.ParserFn{ - "**/Gemfile.lock": parseGemfileLockEntries, + "**/Gemfile.lock": parseGemFileLockEntries, } return common.NewGenericCataloger(nil, globParsers, "ruby-gemfile-cataloger") } -// NewGemspecCataloger returns a new Bundler cataloger object tailored for detecting installations of gems (e.g. Gemspec). -func NewGemspecCataloger() *common.GenericCataloger { +// NewGemSpecCataloger returns a new Bundler cataloger object tailored for detecting installations of gems (e.g. Gemspec). +func NewGemSpecCataloger() *common.GenericCataloger { globParsers := map[string]common.ParserFn{ - "**/specification/*.gemspec": parseGemspecEntries, + "**/specification/*.gemspec": parseGemSpecEntries, } return common.NewGenericCataloger(nil, globParsers, "ruby-gemspec-cataloger") diff --git a/syft/cataloger/bundler/parse_gemfile_lock.go b/syft/cataloger/bundler/parse_gemfile_lock.go index 76e8990e5..28a45eb76 100644 --- a/syft/cataloger/bundler/parse_gemfile_lock.go +++ b/syft/cataloger/bundler/parse_gemfile_lock.go @@ -11,12 +11,12 @@ import ( ) // integrity check -var _ common.ParserFn = parseGemfileLockEntries +var _ common.ParserFn = parseGemFileLockEntries var sectionsOfInterest = internal.NewStringSetFromSlice([]string{"GEM"}) -// parseGemfileLockEntries is a parser function for Gemfile.lock contents, returning all Gems discovered. -func parseGemfileLockEntries(_ string, reader io.Reader) ([]pkg.Package, error) { +// parseGemFileLockEntries is a parser function for Gemfile.lock contents, returning all Gems discovered. +func parseGemFileLockEntries(_ string, reader io.Reader) ([]pkg.Package, error) { pkgs := make([]pkg.Package, 0) scanner := bufio.NewScanner(reader) diff --git a/syft/cataloger/bundler/parse_gemfile_lock_test.go b/syft/cataloger/bundler/parse_gemfile_lock_test.go index 4e3faf7e5..bb0b254d0 100644 --- a/syft/cataloger/bundler/parse_gemfile_lock_test.go +++ b/syft/cataloger/bundler/parse_gemfile_lock_test.go @@ -68,7 +68,7 @@ func TestParseGemfileLockEntries(t *testing.T) { t.Fatalf("failed to open fixture: %+v", err) } - actual, err := parseGemfileLockEntries(fixture.Name(), fixture) + actual, err := parseGemFileLockEntries(fixture.Name(), fixture) if err != nil { t.Fatalf("failed to parse gemfile lock: %+v", err) } diff --git a/syft/cataloger/bundler/parse_gemspec.go b/syft/cataloger/bundler/parse_gemspec.go index 728668899..90ab8766e 100644 --- a/syft/cataloger/bundler/parse_gemspec.go +++ b/syft/cataloger/bundler/parse_gemspec.go @@ -14,7 +14,7 @@ import ( ) // integrity check -var _ common.ParserFn = parseGemfileLockEntries +var _ common.ParserFn = parseGemFileLockEntries // for line in gem.splitlines(): // line = line.strip() @@ -65,7 +65,7 @@ var postProcessors = map[string]listProcessor{ //}, } -func parseGemspecEntries(_ string, reader io.Reader) ([]pkg.Package, error) { +func parseGemSpecEntries(_ string, reader io.Reader) ([]pkg.Package, error) { var pkgs []pkg.Package var fields = make(map[string]interface{}) scanner := bufio.NewScanner(reader) diff --git a/syft/cataloger/bundler/parse_gemspec_test.go b/syft/cataloger/bundler/parse_gemspec_test.go index b792405b3..8889899d2 100644 --- a/syft/cataloger/bundler/parse_gemspec_test.go +++ b/syft/cataloger/bundler/parse_gemspec_test.go @@ -17,7 +17,7 @@ func TestParseGemspec(t *testing.T) { t.Fatalf("failed to open fixture: %+v", err) } - actual, err := parseGemspecEntries(fixture.Name(), fixture) + actual, err := parseGemSpecEntries(fixture.Name(), fixture) if err != nil { t.Fatalf("failed to parse gemspec: %+v", err) } diff --git a/syft/cataloger/cataloger.go b/syft/cataloger/cataloger.go index b4987f236..e366cea0e 100644 --- a/syft/cataloger/cataloger.go +++ b/syft/cataloger/cataloger.go @@ -36,7 +36,7 @@ type Cataloger interface { // ImageCatalogers returns a slice of locally implemented catalogers that are fit for detecting installations of packages. func ImageCatalogers() []Cataloger { return []Cataloger{ - bundler.NewGemspecCataloger(), + bundler.NewGemSpecCataloger(), python.NewPythonCataloger(), // TODO: split and replace me javascript.NewJavascriptCataloger(), // TODO: split and replace me deb.NewDpkgdbCataloger(), @@ -50,7 +50,7 @@ func ImageCatalogers() []Cataloger { // DirectoryCatalogers returns a slice of locally implemented catalogers that are fit for detecting packages from index files (and select installations) func DirectoryCatalogers() []Cataloger { return []Cataloger{ - bundler.NewGemfileLockCataloger(), + bundler.NewGemFileLockCataloger(), python.NewPythonCataloger(), // TODO: split and replace me javascript.NewJavascriptCataloger(), // TODO: split and replace me deb.NewDpkgdbCataloger(), diff --git a/syft/cataloger/common/generic_cataloger.go b/syft/cataloger/common/generic_cataloger.go index 1255f0794..ed314fcae 100644 --- a/syft/cataloger/common/generic_cataloger.go +++ b/syft/cataloger/common/generic_cataloger.go @@ -15,27 +15,27 @@ import ( // GenericCataloger implements the Catalog interface and is responsible for dispatching the proper parser function for // a given path or glob pattern. This is intended to be reusable across many package cataloger types. type GenericCataloger struct { - globParsers map[string]ParserFn - pathParsers map[string]ParserFn - selectedFiles []file.Reference - parsers map[file.Reference]ParserFn - upstreamMatcher string + globParsers map[string]ParserFn + pathParsers map[string]ParserFn + selectedFiles []file.Reference + parsers map[file.Reference]ParserFn + upstreamCataloger string } // NewGenericCataloger if provided path-to-parser-function and glob-to-parser-function lookups creates a GenericCataloger -func NewGenericCataloger(pathParsers map[string]ParserFn, globParsers map[string]ParserFn, upstreamMatcher string) *GenericCataloger { +func NewGenericCataloger(pathParsers map[string]ParserFn, globParsers map[string]ParserFn, upstreamCataloger string) *GenericCataloger { return &GenericCataloger{ - globParsers: globParsers, - pathParsers: pathParsers, - selectedFiles: make([]file.Reference, 0), - parsers: make(map[file.Reference]ParserFn), - upstreamMatcher: upstreamMatcher, + globParsers: globParsers, + pathParsers: pathParsers, + selectedFiles: make([]file.Reference, 0), + parsers: make(map[file.Reference]ParserFn), + upstreamCataloger: upstreamCataloger, } } // Name returns a string that uniquely describes the upstream cataloger that this Generic Cataloger represents. func (a *GenericCataloger) Name() string { - return a.upstreamMatcher + return a.upstreamCataloger } // register pairs a set of file references with a parser function for future cataloging (when the file contents are resolved) @@ -88,19 +88,19 @@ func (a *GenericCataloger) Catalog(contents map[file.Reference]string) ([]pkg.Pa for reference, parser := range a.parsers { content, ok := contents[reference] if !ok { - log.Errorf("cataloger '%s' missing file content: %+v", a.upstreamMatcher, reference) + log.Errorf("cataloger '%s' missing file content: %+v", a.upstreamCataloger, reference) continue } entries, err := parser(string(reference.Path), strings.NewReader(content)) if err != nil { // TODO: should we fail? or only log? - log.Errorf("cataloger '%s' failed to parse entries (reference=%+v): %+v", a.upstreamMatcher, reference, err) + log.Errorf("cataloger '%s' failed to parse entries (reference=%+v): %+v", a.upstreamCataloger, reference, err) continue } for _, entry := range entries { - entry.FoundBy = a.upstreamMatcher + entry.FoundBy = a.upstreamCataloger entry.Source = []file.Reference{reference} packages = append(packages, entry) From 1f0f6fa3e5fc6e48e448ed8e3d7fdf504e94c1f8 Mon Sep 17 00:00:00 2001 From: Alex Goodman Date: Tue, 6 Oct 2020 14:02:04 -0400 Subject: [PATCH 5/8] more gemspec tests Signed-off-by: Alex Goodman --- syft/cataloger/bundler/parse_gemspec.go | 70 +++++++++---------- syft/cataloger/bundler/parse_gemspec_test.go | 39 +++++------ .../bundler/test-fixtures/bundler.gemspec | 49 ++++++------- syft/pkg/gem_metadata.go | 8 ++- 4 files changed, 80 insertions(+), 86 deletions(-) diff --git a/syft/cataloger/bundler/parse_gemspec.go b/syft/cataloger/bundler/parse_gemspec.go index 90ab8766e..00a3f5c70 100644 --- a/syft/cataloger/bundler/parse_gemspec.go +++ b/syft/cataloger/bundler/parse_gemspec.go @@ -16,53 +16,42 @@ import ( // integrity check var _ common.ParserFn = parseGemFileLockEntries -// for line in gem.splitlines(): -// line = line.strip() -// line = re.sub(r"\.freeze", "", line) - -// # look for the unicode \u{} format and try to convert to something python can use -// patt = re.match(r".*\.homepage *= *(.*) *", line) -// if patt: -// sourcepkg = json.loads(patt.group(1)) - -// patt = re.match(r".*\.licenses *= *(.*) *", line) -// if patt: -// lstr = re.sub(r"^\[|\]$", "", patt.group(1)).split(',') -// for thestr in lstr: -// thestr = re.sub(' *" *', "", thestr) -// lics.append(thestr) - -// patt = re.match(r".*\.authors *= *(.*) *", line) -// if patt: -// lstr = re.sub(r"^\[|\]$", "", patt.group(1)).split(',') -// for thestr in lstr: -// thestr = re.sub(' *" *', "", thestr) -// origins.append(thestr) - -// patt = re.match(r".*\.files *= *(.*) *", line) -// if patt: -// lstr = re.sub(r"^\[|\]$", "", patt.group(1)).split(',') -// for thestr in lstr: -// thestr = re.sub(' *" *', "", thestr) -// rfiles.append(thestr) - -type listProcessor func(string) []string +type postProcessor func(string) []string var patterns = map[string]*regexp.Regexp{ // match example: name = "railties".freeze ---> railties "name": regexp.MustCompile(`.*\.name\s*=\s*["']{1}(?P.*)["']{1} *`), + // match example: version = "1.0.4".freeze ---> 1.0.4 "version": regexp.MustCompile(`.*\.version\s*=\s*["']{1}(?P.*)["']{1} *`), - // match example: homepage = "https://github.com/anchore/syft".freeze ---> https://github.com/anchore/syft + + // match example: + // homepage = "https://github.com/anchore/syft".freeze ---> https://github.com/anchore/syft "homepage": regexp.MustCompile(`.*\.homepage\s*=\s*["']{1}(?P.*)["']{1} *`), - // TODO: add more fields + + // match example: files = ["exe/bundle".freeze, "exe/bundler".freeze] ---> "exe/bundle".freeze, "exe/bundler".freeze + "files": regexp.MustCompile(`.*\.files\s*=\s*\[(?P.*)\] *`), + + // match example: authors = ["Andr\u00E9 Arko".freeze, "Samuel Giddins".freeze, "Colby Swandale".freeze, + // "Hiroshi Shibata".freeze, "David Rodr\u00EDguez".freeze, "Grey Baker".freeze...] + "authors": regexp.MustCompile(`.*\.authors\s*=\s*\[(?P.*)\] *`), + + // match example: licenses = ["MIT".freeze] ----> "MIT".freeze + "licenses": regexp.MustCompile(`.*\.licenses\s*=\s*\[(?P.*)\] *`), } -// TODO: use post processors for lists -var postProcessors = map[string]listProcessor{ - //"files": func(s string) []string { - // - //}, +var postProcessors = map[string]postProcessor{ + "files": processList, + "authors": processList, + "licenses": processList, +} + +func processList(s string) []string { + var results []string + for _, item := range strings.Split(s, ",") { + results = append(results, strings.Trim(item, "\" ")) + } + return results } func parseGemSpecEntries(_ string, reader io.Reader) ([]pkg.Package, error) { @@ -75,12 +64,16 @@ func parseGemSpecEntries(_ string, reader io.Reader) ([]pkg.Package, error) { // TODO: sanitize unicode? (see engine code) sanitizedLine := strings.TrimSpace(line) + sanitizedLine = strings.ReplaceAll(sanitizedLine, ".freeze", "") if sanitizedLine == "" { continue } for field, pattern := range patterns { + if strings.Contains(sanitizedLine, "licenses") { + println("Found it.") + } matchMap := matchCaptureGroups(pattern, sanitizedLine) if value := matchMap[field]; value != "" { if postProcessor := postProcessors[field]; postProcessor != nil { @@ -103,6 +96,7 @@ func parseGemSpecEntries(_ string, reader io.Reader) ([]pkg.Package, error) { pkgs = append(pkgs, pkg.Package{ Name: metadata.Name, Version: metadata.Version, + Licenses: metadata.Licenses, Language: pkg.Ruby, Type: pkg.GemPkg, Metadata: metadata, diff --git a/syft/cataloger/bundler/parse_gemspec_test.go b/syft/cataloger/bundler/parse_gemspec_test.go index 8889899d2..11d1276f1 100644 --- a/syft/cataloger/bundler/parse_gemspec_test.go +++ b/syft/cataloger/bundler/parse_gemspec_test.go @@ -5,11 +5,23 @@ import ( "testing" "github.com/anchore/syft/syft/pkg" + "github.com/go-test/deep" ) func TestParseGemspec(t *testing.T) { - var expectedGems = map[string]string{ - "bundler": "2.1.4", + var expectedPkg = pkg.Package{ + Name: "bundler", + Version: "2.1.4", + Type: pkg.GemPkg, + Licenses: []string{"MIT"}, + Language: pkg.Ruby, + Metadata: pkg.GemMetadata{ + Name: "bundler", + Version: "2.1.4", + Files: []string{"exe/bundle", "exe/bundler"}, + Authors: []string{"André Arko", "Samuel Giddins", "Colby Swandale", "Hiroshi Shibata", "David Rodréguez", "Grey Baker", "Stephanie Morillo", "Chris Morris", "James Wen", "Tim Moore", "André Medeiros", "Jessica Lynn Suttles", "Terence Lee", "Carl Lerche", "Yehuda Katz"}, + Licenses: []string{"MIT"}, + }, } fixture, err := os.Open("test-fixtures/bundler.gemspec") @@ -22,29 +34,14 @@ func TestParseGemspec(t *testing.T) { t.Fatalf("failed to parse gemspec: %+v", err) } - if len(actual) != len(expectedGems) { + if len(actual) != 1 { for _, a := range actual { t.Log(" ", a) } - t.Fatalf("unexpected package count: %d!=%d", len(actual), len(expectedGems)) + t.Fatalf("unexpected package count: %d!=1", len(actual)) } - for _, a := range actual { - expectedVersion, ok := expectedGems[a.Name] - if !ok { - t.Errorf("unexpected package found: %s", a.Name) - } - - if expectedVersion != a.Version { - t.Errorf("unexpected package version (pkg=%s): %s", a.Name, a.Version) - } - - if a.Language != pkg.Ruby { - t.Errorf("bad language (pkg=%+v): %+v", a.Name, a.Language) - } - - if a.Type != pkg.GemPkg { - t.Errorf("bad package type (pkg=%+v): %+v", a.Name, a.Type) - } + for _, d := range deep.Equal(actual[0], expectedPkg) { + t.Errorf("diff: %+v", d) } } diff --git a/syft/cataloger/bundler/test-fixtures/bundler.gemspec b/syft/cataloger/bundler/test-fixtures/bundler.gemspec index 450b81096..a877840b3 100644 --- a/syft/cataloger/bundler/test-fixtures/bundler.gemspec +++ b/syft/cataloger/bundler/test-fixtures/bundler.gemspec @@ -1,25 +1,26 @@ # frozen_string_literal: true -# -*- encoding: utf-8 -*- -# stub: bundler 2.1.4 ruby lib - -Gem::Specification.new do |s| - s.name = "bundler".freeze - s.version = "2.1.4" - - s.required_rubygems_version = Gem::Requirement.new(">= 2.5.2".freeze) if s.respond_to? :required_rubygems_version= - s.require_paths = ["lib".freeze] - s.authors = ["Andr\u00E9 Arko".freeze, "Samuel Giddins".freeze, "Colby Swandale".freeze, "Hiroshi Shibata".freeze, "David Rodr\u00EDguez".freeze, "Grey Baker".f - s.bindir = "exe".freeze - s.date = "2020-01-05" - s.description = "Bundler manages an application's dependencies through its entire life, across many machines, systematically and repeatably".freeze - s.email = ["team@bundler.io".freeze] - s.executables = ["bundle".freeze, "bundler".freeze] - s.files = ["exe/bundle".freeze, "exe/bundler".freeze] - s.homepage = "https://bundler.io".freeze - s.licenses = ["MIT".freeze] - s.required_ruby_version = Gem::Requirement.new(">= 2.3.0".freeze) - s.rubygems_version = "3.1.2".freeze - s.summary = "The best way to manage your application's dependencies".freeze - - s.installed_by_version = "3.1.2" if s.respond_to? :installed_by_version - end \ No newline at end of file +# -*- encoding: utf-8 -*- +# stub: bundler 2.1.4 ruby lib + +Gem::Specification.new do |s| + s.name = "bundler".freeze + s.version = "2.1.4" + + s.required_rubygems_version = Gem::Requirement.new(">= 2.5.2".freeze) if s.respond_to? :required_rubygems_version= + s.metadata = { "bug_tracker_uri" => "https://github.com/bundler/bundler/issues", "changelog_uri" => "https://github.com/bundler/bundler/blob/master/CHANGELOG.md", "homepage_uri" => "https://bundler.io/", "source_code_uri" => "https://github.com/bundler/bundler/" } if s.respond_to? :metadata= + s.require_paths = ["lib".freeze] + s.authors = ["Andr\u00E9 Arko".freeze, "Samuel Giddins".freeze, "Colby Swandale".freeze, "Hiroshi Shibata".freeze, "David Rodr\u00EDguez".freeze, "Grey Baker".freeze, "Stephanie Morillo".freeze, "Chris Morris".freeze, "James Wen".freeze, "Tim Moore".freeze, "Andr\u00E9 Medeiros".freeze, "Jessica Lynn Suttles".freeze, "Terence Lee".freeze, "Carl Lerche".freeze, "Yehuda Katz".freeze] + s.bindir = "exe".freeze + s.date = "2020-01-05" + s.description = "Bundler manages an application's dependencies through its entire life, across many machines, systematically and repeatably".freeze + s.email = ["team@bundler.io".freeze] + s.executables = ["bundle".freeze, "bundler".freeze] + s.files = ["exe/bundle".freeze, "exe/bundler".freeze] + s.homepage = "https://bundler.io".freeze + s.licenses = ["MIT".freeze] + s.required_ruby_version = Gem::Requirement.new(">= 2.3.0".freeze) + s.rubygems_version = "3.1.2".freeze + s.summary = "The best way to manage your application's dependencies".freeze + + s.installed_by_version = "3.1.2" if s.respond_to? :installed_by_version +end \ No newline at end of file diff --git a/syft/pkg/gem_metadata.go b/syft/pkg/gem_metadata.go index 164f6b007..aa2edc310 100644 --- a/syft/pkg/gem_metadata.go +++ b/syft/pkg/gem_metadata.go @@ -1,7 +1,9 @@ package pkg type GemMetadata struct { - Name string `mapstructure:"name" json:"name"` - Version string `mapstructure:"version" json:"version"` - // TODO: add more fields from the gemspec + Name string `mapstructure:"name" json:"name"` + Version string `mapstructure:"version" json:"version"` + Files []string `mapstructure:"files" json:"files"` + Authors []string `mapstructure:"authors" json:"authors"` + Licenses []string `mapstructure:"licenses" json:"licenses"` } From 46c74865e5175569fd69428e6624f08d43396d28 Mon Sep 17 00:00:00 2001 From: Alex Goodman Date: Wed, 7 Oct 2020 11:19:29 -0400 Subject: [PATCH 6/8] add render unicode in gemspec parser Signed-off-by: Alex Goodman --- syft/cataloger/bundler/parse_gemspec.go | 49 ++++++++++++++++++-- syft/cataloger/bundler/parse_gemspec_test.go | 6 +-- 2 files changed, 48 insertions(+), 7 deletions(-) diff --git a/syft/cataloger/bundler/parse_gemspec.go b/syft/cataloger/bundler/parse_gemspec.go index 00a3f5c70..11fa6c059 100644 --- a/syft/cataloger/bundler/parse_gemspec.go +++ b/syft/cataloger/bundler/parse_gemspec.go @@ -5,6 +5,7 @@ import ( "fmt" "io" "regexp" + "strconv" "strings" "github.com/mitchellh/mapstructure" @@ -62,18 +63,15 @@ func parseGemSpecEntries(_ string, reader io.Reader) ([]pkg.Package, error) { for scanner.Scan() { line := scanner.Text() - // TODO: sanitize unicode? (see engine code) sanitizedLine := strings.TrimSpace(line) sanitizedLine = strings.ReplaceAll(sanitizedLine, ".freeze", "") + sanitizedLine = renderUtf8(sanitizedLine) if sanitizedLine == "" { continue } for field, pattern := range patterns { - if strings.Contains(sanitizedLine, "licenses") { - println("Found it.") - } matchMap := matchCaptureGroups(pattern, sanitizedLine) if value := matchMap[field]; value != "" { if postProcessor := postProcessors[field]; postProcessor != nil { @@ -106,6 +104,49 @@ func parseGemSpecEntries(_ string, reader io.Reader) ([]pkg.Package, error) { return pkgs, nil } +// renderUtf8 takes any string escaped string sub-sections from the ruby string and replaces those sections with the UTF8 runes. +func renderUtf8(s string) string { + pattern := regexp.MustCompile(`\\u(?P[0-9A-F]{4,8})`) + fullReplacement := replaceAllStringSubmatchFunc(pattern, s, func(unicodeSection []string) string { + replacement := "" + if len(unicodeSection) == 1 { + return unicodeSection[0] + } + for idx, m := range unicodeSection { + if idx == 0 { + continue + } + value, err := strconv.ParseInt(m, 16, 64) + if err != nil { + // TODO: log? + panic(err) + //return unicodeSection[0] + } + replacement = strings.ReplaceAll(unicodeSection[0], "\\u"+m, string(rune(value))) + } + return replacement + }) + return fullReplacement +} + +// replaceAllStringSubmatchFunc finds and replaces the given capture groups from the +func replaceAllStringSubmatchFunc(re *regexp.Regexp, str string, repl func([]string) string) string { + result := "" + lastIndex := 0 + + for _, v := range re.FindAllSubmatchIndex([]byte(str), -1) { + var groups []string + for i := 0; i < len(v); i += 2 { + groups = append(groups, str[v[i]:v[i+1]]) + } + + result += str[lastIndex:v[0]] + repl(groups) + lastIndex = v[1] + } + + return result + str[lastIndex:] +} + // matchCaptureGroups takes a regular expression and string and returns all of the named capture group results in a map. func matchCaptureGroups(regEx *regexp.Regexp, str string) map[string]string { match := regEx.FindStringSubmatch(str) diff --git a/syft/cataloger/bundler/parse_gemspec_test.go b/syft/cataloger/bundler/parse_gemspec_test.go index 11d1276f1..539372b53 100644 --- a/syft/cataloger/bundler/parse_gemspec_test.go +++ b/syft/cataloger/bundler/parse_gemspec_test.go @@ -16,9 +16,9 @@ func TestParseGemspec(t *testing.T) { Licenses: []string{"MIT"}, Language: pkg.Ruby, Metadata: pkg.GemMetadata{ - Name: "bundler", - Version: "2.1.4", - Files: []string{"exe/bundle", "exe/bundler"}, + Name: "bundler", + Version: "2.1.4", + Files: []string{"exe/bundle", "exe/bundler"}, Authors: []string{"André Arko", "Samuel Giddins", "Colby Swandale", "Hiroshi Shibata", "David Rodréguez", "Grey Baker", "Stephanie Morillo", "Chris Morris", "James Wen", "Tim Moore", "André Medeiros", "Jessica Lynn Suttles", "Terence Lee", "Carl Lerche", "Yehuda Katz"}, Licenses: []string{"MIT"}, }, From 398d8903e7f7c2f8f2d982cbe42c7e48a7bc21aa Mon Sep 17 00:00:00 2001 From: Alex Goodman Date: Wed, 7 Oct 2020 11:52:13 -0400 Subject: [PATCH 7/8] finalize gemspec parser + update json schema to match Signed-off-by: Alex Goodman --- schema/json/schema.json | 60 ++++++++++++------- syft/cataloger/cataloger.go | 6 +- .../cataloger/{bundler => ruby}/catalogers.go | 2 +- .../{bundler => ruby}/parse_gemfile_lock.go | 2 +- .../parse_gemfile_lock_test.go | 2 +- .../{bundler => ruby}/parse_gemspec.go | 47 ++++----------- .../{bundler => ruby}/parse_gemspec_test.go | 4 +- .../test-fixtures/Gemfile.lock | 0 .../test-fixtures/bundler.gemspec | 0 syft/pkg/gem_metadata.go | 6 +- 10 files changed, 59 insertions(+), 70 deletions(-) rename syft/cataloger/{bundler => ruby}/catalogers.go (98%) rename syft/cataloger/{bundler => ruby}/parse_gemfile_lock.go (98%) rename syft/cataloger/{bundler => ruby}/parse_gemfile_lock_test.go (99%) rename syft/cataloger/{bundler => ruby}/parse_gemspec.go (77%) rename syft/cataloger/{bundler => ruby}/parse_gemspec_test.go (93%) rename syft/cataloger/{bundler => ruby}/test-fixtures/Gemfile.lock (100%) rename syft/cataloger/{bundler => ruby}/test-fixtures/bundler.gemspec (100%) diff --git a/schema/json/schema.json b/schema/json/schema.json index 6bc3dfb47..be4cb2e5f 100644 --- a/schema/json/schema.json +++ b/schema/json/schema.json @@ -40,6 +40,9 @@ "architecture": { "type": "string" }, + "authors": { + "type": "null" + }, "description": { "type": "string" }, @@ -48,31 +51,38 @@ }, "files": { "items": { - "properties": { - "checksum": { + "anyOf": [ + { "type": "string" }, - "ownerGid": { - "type": "string" - }, - "ownerUid": { - "type": "string" - }, - "path": { - "type": "string" - }, - "permissions": { - "type": "string" + { + "properties": { + "checksum": { + "type": "string" + }, + "ownerGid": { + "type": "string" + }, + "ownerUid": { + "type": "string" + }, + "path": { + "type": "string" + }, + "permissions": { + "type": "string" + } + }, + "required": [ + "checksum", + "ownerGid", + "ownerUid", + "path", + "permissions" + ], + "type": "object" } - }, - "required": [ - "checksum", - "ownerGid", - "ownerUid", - "path", - "permissions" - ], - "type": "object" + ] }, "type": "array" }, @@ -85,6 +95,12 @@ "license": { "type": "string" }, + "licenses": { + "items": { + "type": "string" + }, + "type": "array" + }, "maintainer": { "type": "string" }, diff --git a/syft/cataloger/cataloger.go b/syft/cataloger/cataloger.go index e366cea0e..85e0f77bc 100644 --- a/syft/cataloger/cataloger.go +++ b/syft/cataloger/cataloger.go @@ -8,13 +8,13 @@ package cataloger import ( "github.com/anchore/stereoscope/pkg/file" "github.com/anchore/syft/syft/cataloger/apkdb" - "github.com/anchore/syft/syft/cataloger/bundler" "github.com/anchore/syft/syft/cataloger/deb" "github.com/anchore/syft/syft/cataloger/golang" "github.com/anchore/syft/syft/cataloger/java" "github.com/anchore/syft/syft/cataloger/javascript" "github.com/anchore/syft/syft/cataloger/python" "github.com/anchore/syft/syft/cataloger/rpmdb" + "github.com/anchore/syft/syft/cataloger/ruby" "github.com/anchore/syft/syft/pkg" "github.com/anchore/syft/syft/scope" ) @@ -36,7 +36,7 @@ type Cataloger interface { // ImageCatalogers returns a slice of locally implemented catalogers that are fit for detecting installations of packages. func ImageCatalogers() []Cataloger { return []Cataloger{ - bundler.NewGemSpecCataloger(), + ruby.NewGemSpecCataloger(), python.NewPythonCataloger(), // TODO: split and replace me javascript.NewJavascriptCataloger(), // TODO: split and replace me deb.NewDpkgdbCataloger(), @@ -50,7 +50,7 @@ func ImageCatalogers() []Cataloger { // DirectoryCatalogers returns a slice of locally implemented catalogers that are fit for detecting packages from index files (and select installations) func DirectoryCatalogers() []Cataloger { return []Cataloger{ - bundler.NewGemFileLockCataloger(), + ruby.NewGemFileLockCataloger(), python.NewPythonCataloger(), // TODO: split and replace me javascript.NewJavascriptCataloger(), // TODO: split and replace me deb.NewDpkgdbCataloger(), diff --git a/syft/cataloger/bundler/catalogers.go b/syft/cataloger/ruby/catalogers.go similarity index 98% rename from syft/cataloger/bundler/catalogers.go rename to syft/cataloger/ruby/catalogers.go index ae8554226..960cbf720 100644 --- a/syft/cataloger/bundler/catalogers.go +++ b/syft/cataloger/ruby/catalogers.go @@ -1,7 +1,7 @@ /* Package bundler provides a concrete Cataloger implementation for Ruby Gemfile.lock bundler files. */ -package bundler +package ruby import ( "github.com/anchore/syft/syft/cataloger/common" diff --git a/syft/cataloger/bundler/parse_gemfile_lock.go b/syft/cataloger/ruby/parse_gemfile_lock.go similarity index 98% rename from syft/cataloger/bundler/parse_gemfile_lock.go rename to syft/cataloger/ruby/parse_gemfile_lock.go index 28a45eb76..23a4a7756 100644 --- a/syft/cataloger/bundler/parse_gemfile_lock.go +++ b/syft/cataloger/ruby/parse_gemfile_lock.go @@ -1,4 +1,4 @@ -package bundler +package ruby import ( "bufio" diff --git a/syft/cataloger/bundler/parse_gemfile_lock_test.go b/syft/cataloger/ruby/parse_gemfile_lock_test.go similarity index 99% rename from syft/cataloger/bundler/parse_gemfile_lock_test.go rename to syft/cataloger/ruby/parse_gemfile_lock_test.go index bb0b254d0..4307c34fa 100644 --- a/syft/cataloger/bundler/parse_gemfile_lock_test.go +++ b/syft/cataloger/ruby/parse_gemfile_lock_test.go @@ -1,4 +1,4 @@ -package bundler +package ruby import ( "os" diff --git a/syft/cataloger/bundler/parse_gemspec.go b/syft/cataloger/ruby/parse_gemspec.go similarity index 77% rename from syft/cataloger/bundler/parse_gemspec.go rename to syft/cataloger/ruby/parse_gemspec.go index 11fa6c059..ad22dc9c8 100644 --- a/syft/cataloger/bundler/parse_gemspec.go +++ b/syft/cataloger/ruby/parse_gemspec.go @@ -1,11 +1,11 @@ -package bundler +package ruby import ( "bufio" + "encoding/json" "fmt" "io" "regexp" - "strconv" "strings" "github.com/mitchellh/mapstructure" @@ -48,6 +48,7 @@ var postProcessors = map[string]postProcessor{ } func processList(s string) []string { + // nolint:prealloc var results []string for _, item := range strings.Split(s, ",") { results = append(results, strings.Trim(item, "\" ")) @@ -106,47 +107,19 @@ func parseGemSpecEntries(_ string, reader io.Reader) ([]pkg.Package, error) { // renderUtf8 takes any string escaped string sub-sections from the ruby string and replaces those sections with the UTF8 runes. func renderUtf8(s string) string { - pattern := regexp.MustCompile(`\\u(?P[0-9A-F]{4,8})`) - fullReplacement := replaceAllStringSubmatchFunc(pattern, s, func(unicodeSection []string) string { - replacement := "" - if len(unicodeSection) == 1 { - return unicodeSection[0] - } - for idx, m := range unicodeSection { - if idx == 0 { - continue - } - value, err := strconv.ParseInt(m, 16, 64) - if err != nil { - // TODO: log? - panic(err) - //return unicodeSection[0] - } - replacement = strings.ReplaceAll(unicodeSection[0], "\\u"+m, string(rune(value))) + pattern := regexp.MustCompile(`\\u(?P[0-9A-F]{4})`) + fullReplacement := pattern.ReplaceAllStringFunc(s, func(unicodeSection string) string { + var replacement string + // note: the json parser already has support for interpreting hex-representations of unicode escaped strings as unicode runes. + // we can do this ourselves with strconv.Atoi, or leverage the existing json package. + if err := json.Unmarshal([]byte(`"`+unicodeSection+`"`), &replacement); err != nil { + return unicodeSection } return replacement }) return fullReplacement } -// replaceAllStringSubmatchFunc finds and replaces the given capture groups from the -func replaceAllStringSubmatchFunc(re *regexp.Regexp, str string, repl func([]string) string) string { - result := "" - lastIndex := 0 - - for _, v := range re.FindAllSubmatchIndex([]byte(str), -1) { - var groups []string - for i := 0; i < len(v); i += 2 { - groups = append(groups, str[v[i]:v[i+1]]) - } - - result += str[lastIndex:v[0]] + repl(groups) - lastIndex = v[1] - } - - return result + str[lastIndex:] -} - // matchCaptureGroups takes a regular expression and string and returns all of the named capture group results in a map. func matchCaptureGroups(regEx *regexp.Regexp, str string) map[string]string { match := regEx.FindStringSubmatch(str) diff --git a/syft/cataloger/bundler/parse_gemspec_test.go b/syft/cataloger/ruby/parse_gemspec_test.go similarity index 93% rename from syft/cataloger/bundler/parse_gemspec_test.go rename to syft/cataloger/ruby/parse_gemspec_test.go index 539372b53..2c0cae20c 100644 --- a/syft/cataloger/bundler/parse_gemspec_test.go +++ b/syft/cataloger/ruby/parse_gemspec_test.go @@ -1,4 +1,4 @@ -package bundler +package ruby import ( "os" @@ -19,7 +19,7 @@ func TestParseGemspec(t *testing.T) { Name: "bundler", Version: "2.1.4", Files: []string{"exe/bundle", "exe/bundler"}, - Authors: []string{"André Arko", "Samuel Giddins", "Colby Swandale", "Hiroshi Shibata", "David Rodréguez", "Grey Baker", "Stephanie Morillo", "Chris Morris", "James Wen", "Tim Moore", "André Medeiros", "Jessica Lynn Suttles", "Terence Lee", "Carl Lerche", "Yehuda Katz"}, + Authors: []string{"André Arko", "Samuel Giddins", "Colby Swandale", "Hiroshi Shibata", "David Rodríguez", "Grey Baker", "Stephanie Morillo", "Chris Morris", "James Wen", "Tim Moore", "André Medeiros", "Jessica Lynn Suttles", "Terence Lee", "Carl Lerche", "Yehuda Katz"}, Licenses: []string{"MIT"}, }, } diff --git a/syft/cataloger/bundler/test-fixtures/Gemfile.lock b/syft/cataloger/ruby/test-fixtures/Gemfile.lock similarity index 100% rename from syft/cataloger/bundler/test-fixtures/Gemfile.lock rename to syft/cataloger/ruby/test-fixtures/Gemfile.lock diff --git a/syft/cataloger/bundler/test-fixtures/bundler.gemspec b/syft/cataloger/ruby/test-fixtures/bundler.gemspec similarity index 100% rename from syft/cataloger/bundler/test-fixtures/bundler.gemspec rename to syft/cataloger/ruby/test-fixtures/bundler.gemspec diff --git a/syft/pkg/gem_metadata.go b/syft/pkg/gem_metadata.go index aa2edc310..26f5ae0f4 100644 --- a/syft/pkg/gem_metadata.go +++ b/syft/pkg/gem_metadata.go @@ -1,9 +1,9 @@ package pkg type GemMetadata struct { - Name string `mapstructure:"name" json:"name"` - Version string `mapstructure:"version" json:"version"` - Files []string `mapstructure:"files" json:"files"` + Name string `mapstructure:"name" json:"name"` + Version string `mapstructure:"version" json:"version"` + Files []string `mapstructure:"files" json:"files"` Authors []string `mapstructure:"authors" json:"authors"` Licenses []string `mapstructure:"licenses" json:"licenses"` } From 9edbc65bcecbf884156a84ccf88755c7229a031c Mon Sep 17 00:00:00 2001 From: Alex Goodman Date: Thu, 8 Oct 2020 10:55:57 -0400 Subject: [PATCH 8/8] move unicode regex to static space Signed-off-by: Alex Goodman --- syft/cataloger/ruby/parse_gemspec.go | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/syft/cataloger/ruby/parse_gemspec.go b/syft/cataloger/ruby/parse_gemspec.go index ad22dc9c8..7fe310265 100644 --- a/syft/cataloger/ruby/parse_gemspec.go +++ b/syft/cataloger/ruby/parse_gemspec.go @@ -19,6 +19,9 @@ var _ common.ParserFn = parseGemFileLockEntries type postProcessor func(string) []string +// match example: Al\u003Ex ---> 003E +var unicodePattern = regexp.MustCompile(`\\u(?P[0-9A-F]{4})`) + var patterns = map[string]*regexp.Regexp{ // match example: name = "railties".freeze ---> railties "name": regexp.MustCompile(`.*\.name\s*=\s*["']{1}(?P.*)["']{1} *`), @@ -107,8 +110,7 @@ func parseGemSpecEntries(_ string, reader io.Reader) ([]pkg.Package, error) { // renderUtf8 takes any string escaped string sub-sections from the ruby string and replaces those sections with the UTF8 runes. func renderUtf8(s string) string { - pattern := regexp.MustCompile(`\\u(?P[0-9A-F]{4})`) - fullReplacement := pattern.ReplaceAllStringFunc(s, func(unicodeSection string) string { + fullReplacement := unicodePattern.ReplaceAllStringFunc(s, func(unicodeSection string) string { var replacement string // note: the json parser already has support for interpreting hex-representations of unicode escaped strings as unicode runes. // we can do this ourselves with strconv.Atoi, or leverage the existing json package.