Guess unpinned versions in python requirements.txt (#1966)

* feat: python requirements.txt parsing inclusive

Signed-off-by: manifestori <ori@manifestcyber.com>

* refactor: parseVersion

Signed-off-by: manifestori <ori@manifestcyber.com>

* add python config for optional requirements version constraint resolution

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>

* fix tests

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* allow for python requirements metadata to be optional

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* restore cyclonedx dependency

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

---------

Signed-off-by: manifestori <ori@manifestcyber.com>
Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>
Signed-off-by: Alex Goodman <alex.goodman@anchore.com>
Co-authored-by: manifestori <ori@manifestcyber.com>
This commit is contained in:
Alex Goodman 2023-07-27 14:26:59 -04:00 committed by GitHub
parent bf1102c3f1
commit 063e9da65d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
14 changed files with 474 additions and 147 deletions

View file

@ -577,6 +577,13 @@ linux-kernel:
# SYFT_LINUX_KERNEL_CATALOG_MODULES env var
catalog-modules: true
python:
# when running across entries in requirements.txt that do not specify a specific version
# (e.g. "sqlalchemy >= 1.0.0, <= 2.0.0, != 3.0.0, <= 3.0.0"), attempt to guess what the version could
# be based on the version requirements specified (e.g. "1.0.0"). When enabled the lowest expressible version
# when given an arbitrary constraint will be used (even if that version may not be available/published).
guess-unpinned-requirements: false
# cataloging file contents is exposed through the power-user subcommand
file-contents:
cataloger:

2
go.mod
View file

@ -55,6 +55,7 @@ require (
github.com/anchore/clio v0.0.0-20230602170917-e747e60c4aa0
github.com/anchore/go-logger v0.0.0-20230531193951-db5ae83e7dbe
github.com/anchore/stereoscope v0.0.0-20230724160817-d515761c6ca2
github.com/aquasecurity/go-pep440-version v0.0.0-20210121094942-22b2f8951d46
github.com/charmbracelet/bubbletea v0.24.2
github.com/charmbracelet/lipgloss v0.7.1
github.com/dave/jennifer v1.6.1
@ -90,6 +91,7 @@ require (
github.com/acomagu/bufpipe v1.0.4 // indirect
github.com/anchore/fangs v0.0.0-20230531202914-48a718c6b4ba // indirect
github.com/anchore/go-struct-converter v0.0.0-20221118182256-c68fdcfa2092 // indirect
github.com/aquasecurity/go-version v0.0.0-20210121072130-637058cfe492 // indirect
github.com/aymanbagabas/go-osc52/v2 v2.0.1 // indirect
github.com/becheran/wildmatch-go v1.0.0 // indirect
github.com/charmbracelet/bubbles v0.16.1 // indirect

8
go.sum
View file

@ -114,6 +114,10 @@ github.com/andybalholm/brotli v1.0.4 h1:V7DdXeJtZscaqfNuAdSRuRFzuiKlHSC/Zh3zl9qY
github.com/andybalholm/brotli v1.0.4/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig=
github.com/anmitsu/go-shlex v0.0.0-20200514113438-38f4b401e2be h1:9AeTilPcZAjCFIImctFaOjnTIavg87rW78vTPkQqLI8=
github.com/antihax/optional v1.0.0/go.mod h1:uupD/76wgC+ih3iEmQUL+0Ugr19nfwCT1kdvxnR2qWY=
github.com/aquasecurity/go-pep440-version v0.0.0-20210121094942-22b2f8951d46 h1:vmXNl+HDfqqXgr0uY1UgK1GAhps8nbAAtqHNBcgyf+4=
github.com/aquasecurity/go-pep440-version v0.0.0-20210121094942-22b2f8951d46/go.mod h1:olhPNdiiAAMiSujemd1O/sc6GcyePr23f/6uGKtthNg=
github.com/aquasecurity/go-version v0.0.0-20210121072130-637058cfe492 h1:rcEG5HI490FF0a7zuvxOxen52ddygCfNVjP0XOCMl+M=
github.com/aquasecurity/go-version v0.0.0-20210121072130-637058cfe492/go.mod h1:9Beu8XsUNNfzml7WBf3QmyPToP1wm1Gj/Vc5UJKqTzU=
github.com/armon/circbuf v0.0.0-20150827004946-bbbad097214e/go.mod h1:3U/XgcO3hCbHZ8TKRvWD2dDTCfh9M9ya+I9JpbB7O8o=
github.com/armon/go-metrics v0.0.0-20180917152333-f0300d1749da/go.mod h1:Q73ZrmVTwzkszR9V5SSuryQ31EELlFMUz1kKyl939pY=
github.com/armon/go-metrics v0.3.10/go.mod h1:4O98XIr/9W0sxpJ8UaYkvjk10Iff7SnFrb4QAOwNTFc=
@ -171,6 +175,7 @@ github.com/containerd/stargz-snapshotter/estargz v0.14.3 h1:OqlDCK3ZVUO6C3B/5FSk
github.com/containerd/stargz-snapshotter/estargz v0.14.3/go.mod h1:KY//uOCIkSuNAHhJogcZtrNHdKrA99/FCCRjE3HD36o=
github.com/coreos/go-semver v0.3.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3EedlOD2RNk=
github.com/coreos/go-systemd/v22 v22.3.2/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc=
github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU=
github.com/cpuguy83/go-md2man/v2 v2.0.1/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
@ -585,6 +590,7 @@ github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6L
github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4=
github.com/rogpeppe/go-internal v1.9.0 h1:73kH8U+JUqXU8lRuOHeVHaa/SZPifC7BkcraZVejAe8=
github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs=
github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
github.com/ryanuber/columnize v0.0.0-20160712163229-9b3edd62028f/go.mod h1:sm1tb6uqfes/u+d4ooFouqFdy9/2g9QGwK3SQygK0Ts=
github.com/saferwall/pe v1.4.4 h1:Ml++7/2/Z1iKwV4zCsd1nIqTEAdUQKAetwbbcCarhOg=
@ -602,6 +608,7 @@ github.com/sergi/go-diff v1.3.1 h1:xkr+Oxo4BOQKmkn/B9eMK0g5Kg/983T9DqqPHwYqD+8=
github.com/sergi/go-diff v1.3.1/go.mod h1:aMJSSKb2lpPvRNec0+w3fl7LP9IOFzdc9Pa4NFbPK1I=
github.com/shopspring/decimal v1.2.0 h1:abSATXmQEYyShuxI4/vyW3tV1MrKAJzCZ/0zLUXYbsQ=
github.com/shopspring/decimal v1.2.0/go.mod h1:DKyhrW/HYNuLGql+MJL6WCR6knT2jwCFRcu2hWCYk4o=
github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc=
github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo=
github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE=
github.com/sirupsen/logrus v1.7.0/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0=
@ -675,6 +682,7 @@ github.com/ulikunitz/xz v0.5.9/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oW
github.com/ulikunitz/xz v0.5.10 h1:t92gobL9l3HE202wg3rlk19F6X+JOxl9BBrCCMYEYd8=
github.com/ulikunitz/xz v0.5.10/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14=
github.com/urfave/cli v1.22.12/go.mod h1:sSBEIC79qR6OvcmsD4U3KABeOTxDqQtdDnaFuUN30b8=
github.com/urfave/cli/v2 v2.3.0/go.mod h1:LJmUH05zAU44vOAcrfzZQKsZbVcdbOG8rtL3/XcUArI=
github.com/vbatts/go-mtree v0.5.3 h1:S/jYlfG8rZ+a0bhZd+RANXejy7M4Js8fq9U+XoWTd5w=
github.com/vbatts/go-mtree v0.5.3/go.mod h1:eXsdoPMdL2jcJx6HweWi9lYQxBsTp4lNhqqAjgkZUg8=
github.com/vbatts/tar-split v0.11.3 h1:hLFqsOLQ1SsppQNTMpkpPXClLDfC2A3Zgy9OUU+RVck=

View file

@ -20,6 +20,7 @@ import (
"github.com/anchore/syft/syft/pkg/cataloger"
golangCataloger "github.com/anchore/syft/syft/pkg/cataloger/golang"
"github.com/anchore/syft/syft/pkg/cataloger/kernel"
pythonCataloger "github.com/anchore/syft/syft/pkg/cataloger/python"
)
var (
@ -52,6 +53,7 @@ type Application struct {
Package pkg `yaml:"package" json:"package" mapstructure:"package"`
Golang golang `yaml:"golang" json:"golang" mapstructure:"golang"`
LinuxKernel linuxKernel `yaml:"linux-kernel" json:"linux-kernel" mapstructure:"linux-kernel"`
Python python `yaml:"python" json:"python" mapstructure:"python"`
Attest attest `yaml:"attest" json:"attest" mapstructure:"attest"`
FileMetadata FileMetadata `yaml:"file-metadata" json:"file-metadata" mapstructure:"file-metadata"`
FileClassification fileClassification `yaml:"file-classification" json:"file-classification" mapstructure:"file-classification"`
@ -85,6 +87,9 @@ func (cfg Application) ToCatalogerConfig() cataloger.Config {
LinuxKernel: kernel.LinuxCatalogerConfig{
CatalogModules: cfg.LinuxKernel.CatalogModules,
},
Python: pythonCataloger.CatalogerConfig{
GuessUnpinnedRequirements: cfg.Python.GuessUnpinnedRequirements,
},
}
}

13
internal/config/python.go Normal file
View file

@ -0,0 +1,13 @@
package config
import (
"github.com/spf13/viper"
)
type python struct {
GuessUnpinnedRequirements bool `json:"guess-unpinned-requirements" yaml:"guess-unpinned-requirements" mapstructure:"guess-unpinned-requirements"`
}
func (cfg python) loadDefaultValues(v *viper.Viper) {
v.SetDefault("python.guess-unpinned-requirements", false)
}

View file

@ -1626,10 +1626,7 @@
"type": "object",
"required": [
"name",
"extras",
"versionConstraint",
"url",
"markers"
"versionConstraint"
]
},
"RDescriptionFileMetadata": {

View file

@ -46,7 +46,7 @@ func ImageCatalogers(cfg Config) []pkg.Cataloger {
binary.NewCataloger(),
deb.NewDpkgdbCataloger(),
dotnet.NewDotnetPortableExecutableCataloger(),
golang.NewGoModuleBinaryCataloger(cfg.Go()),
golang.NewGoModuleBinaryCataloger(cfg.Golang),
java.NewJavaCataloger(cfg.Java()),
java.NewNativeImageCataloger(),
javascript.NewPackageCataloger(),
@ -74,8 +74,8 @@ func DirectoryCatalogers(cfg Config) []pkg.Cataloger {
dotnet.NewDotnetPortableExecutableCataloger(),
elixir.NewMixLockCataloger(),
erlang.NewRebarLockCataloger(),
golang.NewGoModFileCataloger(cfg.Go()),
golang.NewGoModuleBinaryCataloger(cfg.Go()),
golang.NewGoModFileCataloger(cfg.Golang),
golang.NewGoModuleBinaryCataloger(cfg.Golang),
haskell.NewHackageCataloger(),
java.NewJavaCataloger(cfg.Java()),
java.NewJavaGradleLockfileCataloger(),
@ -85,7 +85,7 @@ func DirectoryCatalogers(cfg Config) []pkg.Cataloger {
nix.NewStoreCataloger(),
php.NewComposerLockCataloger(),
portage.NewPortageCataloger(),
python.NewPythonIndexCataloger(),
python.NewPythonIndexCataloger(cfg.Python),
python.NewPythonPackageCataloger(),
rpm.NewFileCataloger(),
rpm.NewRpmDBCataloger(),
@ -110,8 +110,8 @@ func AllCatalogers(cfg Config) []pkg.Cataloger {
dotnet.NewDotnetPortableExecutableCataloger(),
elixir.NewMixLockCataloger(),
erlang.NewRebarLockCataloger(),
golang.NewGoModFileCataloger(cfg.Go()),
golang.NewGoModuleBinaryCataloger(cfg.Go()),
golang.NewGoModFileCataloger(cfg.Golang),
golang.NewGoModuleBinaryCataloger(cfg.Golang),
haskell.NewHackageCataloger(),
java.NewJavaCataloger(cfg.Java()),
java.NewJavaGradleLockfileCataloger(),
@ -119,12 +119,12 @@ func AllCatalogers(cfg Config) []pkg.Cataloger {
java.NewNativeImageCataloger(),
javascript.NewLockCataloger(),
javascript.NewPackageCataloger(),
kernel.NewLinuxKernelCataloger(cfg.Kernel()),
kernel.NewLinuxKernelCataloger(cfg.LinuxKernel),
nix.NewStoreCataloger(),
php.NewComposerInstalledCataloger(),
php.NewComposerLockCataloger(),
portage.NewPortageCataloger(),
python.NewPythonIndexCataloger(),
python.NewPythonIndexCataloger(cfg.Python),
python.NewPythonPackageCataloger(),
r.NewPackageCataloger(),
rpm.NewFileCataloger(),

View file

@ -4,6 +4,7 @@ import (
"github.com/anchore/syft/syft/pkg/cataloger/golang"
"github.com/anchore/syft/syft/pkg/cataloger/java"
"github.com/anchore/syft/syft/pkg/cataloger/kernel"
"github.com/anchore/syft/syft/pkg/cataloger/python"
)
// TODO: these field naming vs helper function naming schemes are inconsistent.
@ -12,6 +13,7 @@ type Config struct {
Search SearchConfig
Golang golang.GoCatalogerOpts
LinuxKernel kernel.LinuxCatalogerConfig
Python python.CatalogerConfig
Catalogers []string
Parallelism int
}
@ -21,6 +23,7 @@ func DefaultConfig() Config {
Search: DefaultSearchConfig(),
Parallelism: 1,
LinuxKernel: kernel.DefaultLinuxCatalogerConfig(),
Python: python.DefaultCatalogerConfig(),
}
}
@ -30,11 +33,3 @@ func (c Config) Java() java.Config {
SearchIndexedArchives: c.Search.IncludeIndexedArchives,
}
}
func (c Config) Go() golang.GoCatalogerOpts {
return c.Golang
}
func (c Config) Kernel() kernel.LinuxCatalogerConfig {
return c.LinuxKernel
}

View file

@ -6,10 +6,21 @@ import (
const eggInfoGlob = "**/*.egg-info"
type CatalogerConfig struct {
GuessUnpinnedRequirements bool
}
func DefaultCatalogerConfig() CatalogerConfig {
return CatalogerConfig{
GuessUnpinnedRequirements: false,
}
}
// NewPythonIndexCataloger returns a new cataloger for python packages referenced from poetry lock files, requirements.txt files, and setup.py files.
func NewPythonIndexCataloger() *generic.Cataloger {
func NewPythonIndexCataloger(cfg CatalogerConfig) *generic.Cataloger {
rqp := newRequirementsParser(cfg)
return generic.NewCataloger("python-index-cataloger").
WithParserByGlobs(parseRequirementsTxt, "**/*requirements*.txt").
WithParserByGlobs(rqp.parseRequirementsTxt, "**/*requirements*.txt").
WithParserByGlobs(parsePoetryLock, "**/poetry.lock").
WithParserByGlobs(parsePipfileLock, "**/Pipfile.lock").
WithParserByGlobs(parseSetup, "**/setup.py")

View file

@ -263,7 +263,7 @@ func Test_IndexCataloger_Globs(t *testing.T) {
pkgtest.NewCatalogTester().
FromDirectory(t, test.fixture).
ExpectsResolverContentQueries(test.expected).
TestCataloger(t, NewPythonIndexCataloger())
TestCataloger(t, NewPythonIndexCataloger(DefaultCatalogerConfig()))
})
}
}

View file

@ -7,6 +7,10 @@ import (
"strings"
"unicode"
pep440 "github.com/aquasecurity/go-pep440-version"
"github.com/mitchellh/mapstructure"
"github.com/anchore/syft/internal"
"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/file"
@ -14,23 +18,99 @@ import (
"github.com/anchore/syft/syft/pkg/cataloger/generic"
)
var _ generic.Parser = parseRequirementsTxt
const (
// given the example requirement:
// requests[security] == 2.8.* ; python_version < "2.7" and sys_platform == "linux" \
// --hash=sha256:a9b3aaa1904eeb78e32394cd46c6f37ac0fb4af6dc488daa58971bdc7d7fcaf3 \
// --hash=sha256:e9535b8c84dc9571a48999094fda7f33e63c3f1b74f3e5f3ac0105a58405bb65 # some comment
var (
extrasRegex = regexp.MustCompile(`\[.*\]`)
urlRegex = regexp.MustCompile("@.*git.*")
// namePattern matches: requests[security]
namePattern = `(?P<name>\w[\w\[\],\s-_]+)`
// versionConstraintPattern matches: == 2.8.*
versionConstraintPattern = `(?P<versionConstraint>([^\S\r\n]*[~=>!<]+\s*[0-9a-zA-Z.*]+[^\S\r\n]*,?)+)?(@[^\S\r\n]*(?P<url>[^;]*))?`
// markersPattern matches: python_version < "2.7" and sys_platform == "linux"
markersPattern = `(;(?P<markers>.*))?`
// hashesPattern matches: --hash=sha256:a9b3aaa1904eeb78e32394cd46c6f37ac0fb4af6dc488daa58971bdc7d7fcaf3 --hash=sha256:e9535b8c84dc9571a48999094fda7f33e63c3f1b74f3e5f3ac0105a58405bb65
hashesPattern = `(?P<hashes>([^\S\r\n]*--hash=[a-zA-Z0-9:]+)+)?`
// whiteSpaceNoNewlinePattern matches: (any whitespace character except for \r and \n)
whiteSpaceNoNewlinePattern = `[^\S\r\n]*`
)
var requirementPattern = regexp.MustCompile(
`^` +
whiteSpaceNoNewlinePattern +
namePattern +
whiteSpaceNoNewlinePattern +
versionConstraintPattern +
markersPattern +
hashesPattern,
)
type unprocessedRequirement struct {
Name string `mapstructure:"name"`
VersionConstraint string `mapstructure:"versionConstraint"`
Markers string `mapstructure:"markers"`
URL string `mapstructure:"url"`
Hashes string `mapstructure:"hashes"`
}
func newRequirement(raw string) *unprocessedRequirement {
var r unprocessedRequirement
values := internal.MatchNamedCaptureGroups(requirementPattern, raw)
if err := mapstructure.Decode(values, &r); err != nil {
return nil
}
r.Name = strings.TrimSpace(r.Name)
r.VersionConstraint = strings.TrimSpace(r.VersionConstraint)
r.Markers = strings.TrimSpace(r.Markers)
r.URL = strings.TrimSpace(r.URL)
r.Hashes = strings.TrimSpace(r.Hashes)
if r.Name == "" {
return nil
}
return &r
}
type requirementsParser struct {
guessUnpinnedRequirements bool
}
func newRequirementsParser(cfg CatalogerConfig) requirementsParser {
return requirementsParser{
guessUnpinnedRequirements: cfg.GuessUnpinnedRequirements,
}
}
// parseRequirementsTxt takes a Python requirements.txt file, returning all Python packages that are locked to a
// specific version.
func parseRequirementsTxt(_ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
func (rp requirementsParser) parseRequirementsTxt(_ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
var packages []pkg.Package
scanner := bufio.NewScanner(reader)
var lastLine string
for scanner.Scan() {
line := scanner.Text()
rawLineNoComments := removeTrailingComment(line)
line = trimRequirementsTxtLine(line)
line := trimRequirementsTxtLine(scanner.Text())
if lastLine != "" {
line = lastLine + line
lastLine = ""
}
// remove line continuations... smashes the file into a single line
if strings.HasSuffix(line, "\\") {
// this line is a continuation of the previous line
lastLine += strings.TrimSuffix(line, "\\")
continue
}
if line == "" {
// nothing to parse on this line
@ -42,35 +122,20 @@ func parseRequirementsTxt(_ file.Resolver, _ *generic.Environment, reader file.L
continue
}
if !strings.Contains(line, "==") {
// a package without a version, or a range (unpinned) which does not tell us
// exactly what will be installed.
continue
}
// parse a new requirement
parts := strings.Split(line, "==")
if len(parts) < 2 {
// this should never happen, but just in case
req := newRequirement(line)
if req == nil {
log.WithFields("path", reader.RealPath).Warnf("unable to parse requirements.txt line: %q", line)
continue
}
// check if the version contains hash declarations on the same line
version, _ := parseVersionAndHashes(parts[1])
name := removeExtras(req.Name)
version := parseVersion(req.VersionConstraint, rp.guessUnpinnedRequirements)
name := strings.TrimSpace(parts[0])
version = strings.TrimFunc(version, func(r rune) bool {
return !unicode.IsLetter(r) && !unicode.IsNumber(r)
})
// TODO: Update to support more than only ==
versionConstraint := fmt.Sprintf("== %s", version)
if name == "" || version == "" {
log.WithFields("path", reader.RealPath).Debugf("found empty package in requirements.txt line: %q", line)
if version == "" {
log.WithFields("path", reader.RealPath).Tracef("unable to determine package version in requirements.txt line: %q", line)
continue
}
packages = append(
packages,
newPackageForRequirementsWithMetadata(
@ -78,10 +143,10 @@ func parseRequirementsTxt(_ file.Resolver, _ *generic.Environment, reader file.L
version,
pkg.PythonRequirementsMetadata{
Name: name,
Extras: parseExtras(rawLineNoComments),
VersionConstraint: versionConstraint,
URL: parseURL(rawLineNoComments),
Markers: parseMarkers(rawLineNoComments),
Extras: parseExtras(req.Name),
VersionConstraint: req.VersionConstraint,
URL: parseURL(req.URL),
Markers: req.Markers,
},
reader.Location.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation),
),
@ -95,13 +160,68 @@ func parseRequirementsTxt(_ file.Resolver, _ *generic.Environment, reader file.L
return packages, nil, nil
}
func parseVersionAndHashes(version string) (string, []string) {
parts := strings.Split(version, "--hash=")
if len(parts) < 2 {
return version, nil
func parseVersion(version string, guessFromConstraint bool) string {
if isPinnedConstraint(version) {
return strings.TrimSpace(strings.ReplaceAll(version, "==", ""))
}
return parts[0], parts[1:]
if guessFromConstraint {
return guessVersion(version)
}
return ""
}
func isPinnedConstraint(version string) bool {
return strings.Contains(version, "==") && !strings.ContainsAny(version, "*,<>!")
}
func guessVersion(constraint string) string {
// handle "2.8.*" -> "2.8.0"
constraint = strings.ReplaceAll(constraint, "*", "0")
if isPinnedConstraint(constraint) {
return strings.TrimSpace(strings.ReplaceAll(constraint, "==", ""))
}
constraints := strings.Split(constraint, ",")
filteredVersions := map[string]struct{}{}
for _, part := range constraints {
if strings.Contains(part, "!=") {
parts := strings.Split(part, "!=")
filteredVersions[strings.TrimSpace(parts[1])] = struct{}{}
}
}
var closestVersion *pep440.Version
for _, part := range constraints {
// ignore any parts that do not have '=' in them, >,<,~ are not valid semver
parts := strings.SplitAfter(part, "=")
if len(parts) < 2 {
continue
}
version, err := pep440.Parse(strings.TrimSpace(parts[1]))
if err != nil {
// ignore any parts that are not valid semver
continue
}
if _, ok := filteredVersions[version.String()]; ok {
continue
}
if strings.Contains(part, "==") {
parts := strings.Split(part, "==")
return strings.TrimSpace(parts[1])
}
if closestVersion == nil || version.GreaterThan(*closestVersion) {
closestVersion = &version
}
}
if closestVersion == nil {
return ""
}
return closestVersion.String()
}
// trimRequirementsTxtLine removes content from the given requirements.txt line
@ -109,8 +229,6 @@ func parseVersionAndHashes(version string) (string, []string) {
func trimRequirementsTxtLine(line string) string {
line = strings.TrimSpace(line)
line = removeTrailingComment(line)
line = removeEnvironmentMarkers(line)
line = checkForRegex(line) // remove extras and url from line if found
return line
}
@ -127,44 +245,29 @@ func removeTrailingComment(line string) string {
return parts[0]
}
// removeEnvironmentMarkers removes any instances of environment markers (delimited by ';') from the line.
// For more information, see https://www.python.org/dev/peps/pep-0508/#environment-markers.
func removeEnvironmentMarkers(line string) string {
parts := strings.SplitN(line, ";", 2)
if len(parts) < 2 {
// there aren't any environment markers
return line
func removeExtras(packageName string) string {
start := strings.Index(packageName, "[")
if start == -1 {
return packageName
}
return parts[0]
return strings.TrimSpace(packageName[:start])
}
func parseExtras(packageName string) []string {
if extrasRegex.MatchString(packageName) {
// Remove square brackets
extras := strings.TrimFunc(extrasRegex.FindString(packageName), func(r rune) bool {
return !unicode.IsLetter(r) && !unicode.IsNumber(r)
})
var extras []string
// Remove any additional whitespace
extras = strings.ReplaceAll(extras, " ", "")
return strings.Split(extras, ",")
start := strings.Index(packageName, "[")
stop := strings.Index(packageName, "]")
if start == -1 || stop == -1 {
return extras
}
return []string{}
}
func parseMarkers(line string) string {
var markers string
parts := strings.SplitN(line, ";", 2)
if len(parts) == 2 {
markers = strings.TrimSpace(parts[1])
extraString := packageName[start+1 : stop]
for _, extra := range strings.Split(extraString, ",") {
extras = append(extras, strings.TrimSpace(extra))
}
return markers
return extras
}
func parseURL(line string) string {
@ -191,19 +294,3 @@ func parseURL(line string) string {
return ""
}
// function to check a string for all possilbe regex expressions, replacing it if found
func checkForRegex(stringToCheck string) string {
stringToReturn := stringToCheck
for _, r := range []*regexp.Regexp{
urlRegex,
extrasRegex,
} {
if r.MatchString(stringToCheck) {
stringToReturn = r.ReplaceAllString(stringToCheck, "")
}
}
return stringToReturn
}

View file

@ -3,6 +3,8 @@ package python
import (
"testing"
"github.com/stretchr/testify/assert"
"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/pkg"
@ -12,7 +14,8 @@ import (
func TestParseRequirementsTxt(t *testing.T) {
fixture := "test-fixtures/requires/requirements.txt"
locations := file.NewLocationSet(file.NewLocation(fixture))
expectedPkgs := []pkg.Package{
pinnedPkgs := []pkg.Package{
{
Name: "flask",
Version: "4.0.0",
@ -23,9 +26,7 @@ func TestParseRequirementsTxt(t *testing.T) {
MetadataType: pkg.PythonRequirementsMetadataType,
Metadata: pkg.PythonRequirementsMetadata{
Name: "flask",
Extras: []string{},
VersionConstraint: "== 4.0.0",
URL: "",
},
},
{
@ -38,9 +39,7 @@ func TestParseRequirementsTxt(t *testing.T) {
MetadataType: pkg.PythonRequirementsMetadataType,
Metadata: pkg.PythonRequirementsMetadata{
Name: "foo",
Extras: []string{},
VersionConstraint: "== 1.0.0",
URL: "",
},
},
{
@ -53,9 +52,7 @@ func TestParseRequirementsTxt(t *testing.T) {
MetadataType: pkg.PythonRequirementsMetadataType,
Metadata: pkg.PythonRequirementsMetadata{
Name: "SomeProject",
Extras: []string{},
VersionConstraint: "== 5.4",
URL: "",
VersionConstraint: "==5.4",
Markers: "python_version < '3.8'",
},
},
@ -69,9 +66,7 @@ func TestParseRequirementsTxt(t *testing.T) {
MetadataType: pkg.PythonRequirementsMetadataType,
Metadata: pkg.PythonRequirementsMetadata{
Name: "argh",
Extras: []string{},
VersionConstraint: "== 0.26.2",
URL: "",
VersionConstraint: "==0.26.2",
},
},
{
@ -84,9 +79,7 @@ func TestParseRequirementsTxt(t *testing.T) {
MetadataType: pkg.PythonRequirementsMetadataType,
Metadata: pkg.PythonRequirementsMetadata{
Name: "argh",
Extras: []string{},
VersionConstraint: "== 0.26.3",
URL: "",
VersionConstraint: "==0.26.3",
},
},
{
@ -101,23 +94,6 @@ func TestParseRequirementsTxt(t *testing.T) {
Name: "celery",
Extras: []string{"redis", "pytest"},
VersionConstraint: "== 4.4.7",
URL: "",
},
},
{
Name: "requests",
Version: "2.8",
PURL: "pkg:pypi/requests@2.8",
Locations: locations,
Language: pkg.Python,
Type: pkg.PythonPkg,
MetadataType: pkg.PythonRequirementsMetadataType,
Metadata: pkg.PythonRequirementsMetadata{
Name: "requests",
Extras: []string{"security"},
VersionConstraint: "== 2.8",
URL: "",
Markers: `python_version < "2.7" and sys_platform == "linux"`,
},
},
{
@ -130,14 +106,238 @@ func TestParseRequirementsTxt(t *testing.T) {
MetadataType: pkg.PythonRequirementsMetadataType,
Metadata: pkg.PythonRequirementsMetadata{
Name: "GithubSampleProject",
Extras: []string{},
VersionConstraint: "== 3.7.1",
URL: "git+https://github.com/owner/repo@releases/tag/v3.7.1",
},
},
}
var expectedRelationships []artifact.Relationship
var testCases = []struct {
name string
fixture string
cfg CatalogerConfig
expectedPkgs []pkg.Package
expectedRelationships []artifact.Relationship
}{
{
name: "pinned dependencies only",
fixture: fixture,
cfg: CatalogerConfig{
GuessUnpinnedRequirements: false,
},
expectedPkgs: pinnedPkgs,
},
{
name: "guess unpinned requirements (lowest version)",
fixture: fixture,
cfg: CatalogerConfig{
GuessUnpinnedRequirements: true,
},
expectedPkgs: append([]pkg.Package{
{
Name: "Mopidy-Dirble",
Version: "1.1",
PURL: "pkg:pypi/Mopidy-Dirble@1.1",
Locations: locations,
Language: pkg.Python,
Type: pkg.PythonPkg,
MetadataType: pkg.PythonRequirementsMetadataType,
Metadata: pkg.PythonRequirementsMetadata{
Name: "Mopidy-Dirble",
VersionConstraint: "~= 1.1",
},
},
{
Name: "sqlalchemy",
Version: "2.0.0",
PURL: "pkg:pypi/sqlalchemy@2.0.0",
Locations: locations,
Language: pkg.Python,
Type: pkg.PythonPkg,
MetadataType: pkg.PythonRequirementsMetadataType,
Metadata: pkg.PythonRequirementsMetadata{
Name: "sqlalchemy",
VersionConstraint: ">= 1.0.0, <= 2.0.0, != 3.0.0, <= 3.0.0",
},
},
{
Name: "bar",
Version: "2.0.0",
PURL: "pkg:pypi/bar@2.0.0",
Locations: locations,
Language: pkg.Python,
Type: pkg.PythonPkg,
MetadataType: pkg.PythonRequirementsMetadataType,
Metadata: pkg.PythonRequirementsMetadata{
Name: "bar",
VersionConstraint: ">= 1.0.0, <= 2.0.0, != 3.0.0, <= 3.0.0",
},
},
{
Name: "numpy",
Version: "3.4.1",
PURL: "pkg:pypi/numpy@3.4.1",
Locations: locations,
Language: pkg.Python,
Type: pkg.PythonPkg,
MetadataType: pkg.PythonRequirementsMetadataType,
Metadata: pkg.PythonRequirementsMetadata{
Name: "numpy",
VersionConstraint: ">= 3.4.1",
Markers: `sys_platform == 'win32'`,
},
},
{
Name: "requests",
Version: "2.8.0",
PURL: "pkg:pypi/requests@2.8.0",
Locations: locations,
Language: pkg.Python,
Type: pkg.PythonPkg,
MetadataType: pkg.PythonRequirementsMetadataType,
Metadata: pkg.PythonRequirementsMetadata{
Name: "requests",
Extras: []string{"security"},
VersionConstraint: "== 2.8.*",
Markers: `python_version < "2.7" and sys_platform == "linux"`,
},
},
}, pinnedPkgs...),
},
}
pkgtest.TestFileParser(t, fixture, parseRequirementsTxt, expectedPkgs, expectedRelationships)
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
parser := newRequirementsParser(tc.cfg)
pkgtest.TestFileParser(t, tc.fixture, parser.parseRequirementsTxt, tc.expectedPkgs, tc.expectedRelationships)
})
}
}
func Test_newRequirement(t *testing.T) {
tests := []struct {
name string
raw string
want *unprocessedRequirement
}{
{
name: "simple",
raw: "requests==2.8",
want: &unprocessedRequirement{
Name: "requests",
VersionConstraint: "==2.8",
},
},
{
name: "comment + constraint",
raw: "Mopidy-Dirble ~= 1.1 # Compatible release. Same as >= 1.1, == 1.*",
want: &unprocessedRequirement{
Name: "Mopidy-Dirble",
VersionConstraint: "~= 1.1",
},
},
{
name: "hashes",
raw: "argh==0.26.3 --hash=sha256:a9b3aaa1904eeb78e32394cd46c6f37ac0fb4af6dc488daa58971bdc7d7fcaf3 --hash=sha256:e9535b8c84dc9571a48999094fda7f33e63c3f1b74f3e5f3ac0105a58405bb65",
want: &unprocessedRequirement{
Name: "argh",
VersionConstraint: "==0.26.3",
Hashes: "--hash=sha256:a9b3aaa1904eeb78e32394cd46c6f37ac0fb4af6dc488daa58971bdc7d7fcaf3 --hash=sha256:e9535b8c84dc9571a48999094fda7f33e63c3f1b74f3e5f3ac0105a58405bb65",
},
},
{
name: "extras",
raw: "celery[redis, pytest] == 4.4.7 # should remove [redis, pytest]",
want: &unprocessedRequirement{
Name: "celery[redis, pytest]",
VersionConstraint: "== 4.4.7",
},
},
{
name: "url",
raw: "GithubSampleProject == 3.7.1 @ git+https://github.com/owner/repo@releases/tag/v3.7.1",
want: &unprocessedRequirement{
Name: "GithubSampleProject",
VersionConstraint: "== 3.7.1",
URL: "git+https://github.com/owner/repo@releases/tag/v3.7.1",
},
},
{
name: "markers",
raw: "numpy >= 3.4.1 ; sys_platform == 'win32'",
want: &unprocessedRequirement{
Name: "numpy",
VersionConstraint: ">= 3.4.1",
Markers: "sys_platform == 'win32'",
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
assert.Equal(t, tt.want, newRequirement(tt.raw))
})
}
}
// checkout https://www.darius.page/pipdev/ for help here! (github.com/nok/pipdev)
func Test_parseVersion(t *testing.T) {
tests := []struct {
name string
version string
guess bool
want string
}{
{
name: "exact",
version: "1.0.0",
want: "", // we can only parse constraints, not assume that a single version is a pin
},
{
name: "exact constraint",
version: " == 1.0.0 ",
want: "1.0.0",
},
{
name: "resolve lowest, simple constraint",
version: " >= 1.0.0 ",
guess: true,
want: "1.0.0",
},
{
name: "resolve lowest, compound constraint",
version: " < 2.0.0, >= 1.0.0, != 1.1.0 ",
guess: true,
want: "1.0.0",
},
{
name: "resolve lowest, handle asterisk",
version: "==2.8.*",
guess: true,
want: "2.8.0",
},
{
name: "resolve lowest, handle exceptions",
version: " !=4.0.2,!=4.1.0,!=4.2.0,>=4.0.1,!=4.3.0,!=5.0.0,!=5.1.0,<6.0.0",
guess: true,
want: "4.0.1",
},
{
name: "resolve lowest, compatible version constraint",
version: "~=0.6.10", // equates to >=0.6.10, ==0.6.*
guess: true,
want: "0.6.10",
},
{
name: "resolve lowest, with character in version",
version: "~=1.2b,<=1.3a,!=1.1,!=1.2",
guess: true,
want: "1.3a0", // note: 1.3a == 1.3a0
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
assert.Equal(t, tt.want, parseVersion(tt.version, tt.guess))
})
}
}

View file

@ -1,7 +1,9 @@
flask == 4.0.0
# a line that is ignored
sqlalchemy >= 1.0.0
sqlalchemy >= 1.0.0, <= 2.0.0, != 3.0.0, <= 3.0.0
foo == 1.0.0 # a comment that needs to be ignored
bar >= 1.0.0, <= 2.0.0, \
!= 3.0.0, <= 3.0.0
-e https://github.com/pecan/pecan.git
-r other-requirements.txt
--requirements super-secretrequirements.txt

View file

@ -2,8 +2,8 @@ package pkg
type PythonRequirementsMetadata struct {
Name string `json:"name" mapstruct:"Name"`
Extras []string `json:"extras" mapstruct:"Extras"`
Extras []string `json:"extras,omitempty" mapstruct:"Extras"`
VersionConstraint string `json:"versionConstraint" mapstruct:"VersionConstraint"`
URL string `json:"url" mapstruct:"URL"`
Markers string `json:"markers" mapstruct:"Markers"`
URL string `json:"url,omitempty" mapstruct:"URL"`
Markers string `json:"markers,omitempty" mapstruct:"Markers"`
}