Create python requirements metadata (#1759)

- Create new metadata struct and type for python requirements.
- Update parsing of python requirements to use python requirements metadata.
- Remove extras and url from line. Add them to metadata instead.
- Add unit test to test that extras are removed from package name.
- Update test to look at requirements metadata.
- Will need updated in future to support more than just == for the version constraint.
- Update JSON schema data

Closes anchore/grype#1246
Closes anchore/grype#1251

Signed-off-by: Shane Dell <shanedell100@gmail.com>
This commit is contained in:
Shane Dell 2023-04-27 09:04:30 -04:00 committed by GitHub
parent 451cb9d5ca
commit a07bfe7dfa
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
13 changed files with 2102 additions and 81 deletions

View file

@ -6,5 +6,5 @@ const (
// JSONSchemaVersion is the current schema version output by the JSON encoder
// This is roughly following the "SchemaVer" guidelines for versioning the JSON schema. Please see schema/json/README.md for details on how to increment.
JSONSchemaVersion = "7.1.4"
JSONSchemaVersion = "7.1.5"
)

View file

@ -55,6 +55,7 @@ type artifactMetadataContainer struct {
Portage pkg.PortageMetadata
PythonPackage pkg.PythonPackageMetadata
PythonPipfilelock pkg.PythonPipfileLockMetadata
PythonRequirements pkg.PythonRequirementsMetadata
Rebar pkg.RebarLockMetadata
Rpm pkg.RpmMetadata
RustCargo pkg.CargoPackageMetadata

File diff suppressed because it is too large Load diff

View file

@ -87,9 +87,5 @@
"configuration": {
"config-key": "config-value"
}
},
"schema": {
"version": "7.1.4",
"url": "https://raw.githubusercontent.com/anchore/syft/main/schema/json/schema-7.1.4.json"
}
}

View file

@ -187,9 +187,5 @@
"configuration": {
"config-key": "config-value"
}
},
"schema": {
"version": "7.1.4",
"url": "https://raw.githubusercontent.com/anchore/syft/main/schema/json/schema-7.1.4.json"
}
}

View file

@ -9,7 +9,7 @@
"locations": [
{
"path": "/somefile-1.txt",
"layerID": "sha256:fb6beecb75b39f4bb813dbf177e501edd5ddb3e69bb45cedeb78c676ee1b7a59"
"layerID": "sha256:7e139310bd6ce0956d65a70d26a6d31b240a4f47094a831638f05d381b6c424a"
}
],
"licenses": [
@ -40,7 +40,7 @@
"locations": [
{
"path": "/somefile-2.txt",
"layerID": "sha256:319b588ce64253a87b533c8ed01cf0025e0eac98e7b516e12532957e1244fdec"
"layerID": "sha256:cc833bf31a480c064d65ca67ee37f77f0d0c8ab98eedde7b286ad1ef6f5bdcac"
}
],
"licenses": [],
@ -64,11 +64,11 @@
],
"artifactRelationships": [],
"source": {
"id": "1a678f111c8ddc66fd82687bb024e0dd6af61314404937a80e810c0cf317b796",
"id": "0af8fa79f5497297e4e32f3e03de14ac20ad695159df0ac8373e6543614b9a50",
"type": "image",
"target": {
"userInput": "user-image-input",
"imageID": "sha256:3c51b06feb0cda8ee62d0e3755ef2a8496a6b71f8a55b245f07f31c4bb813d31",
"imageID": "sha256:0cb4395791986bda17562bd6f76811bb6f163f686e198397197ef8241bed58df",
"manifestDigest": "sha256:2731251dc34951c0e50fcc643b4c5f74922dad1a5d98f302b504cf46cd5d9368",
"mediaType": "application/vnd.docker.distribution.manifest.v2+json",
"tags": [
@ -78,17 +78,17 @@
"layers": [
{
"mediaType": "application/vnd.docker.image.rootfs.diff.tar.gzip",
"digest": "sha256:fb6beecb75b39f4bb813dbf177e501edd5ddb3e69bb45cedeb78c676ee1b7a59",
"digest": "sha256:7e139310bd6ce0956d65a70d26a6d31b240a4f47094a831638f05d381b6c424a",
"size": 22
},
{
"mediaType": "application/vnd.docker.image.rootfs.diff.tar.gzip",
"digest": "sha256:319b588ce64253a87b533c8ed01cf0025e0eac98e7b516e12532957e1244fdec",
"digest": "sha256:cc833bf31a480c064d65ca67ee37f77f0d0c8ab98eedde7b286ad1ef6f5bdcac",
"size": 16
}
],
"manifest": "eyJzY2hlbWFWZXJzaW9uIjoyLCJtZWRpYVR5cGUiOiJhcHBsaWNhdGlvbi92bmQuZG9ja2VyLmRpc3RyaWJ1dGlvbi5tYW5pZmVzdC52Mitqc29uIiwiY29uZmlnIjp7Im1lZGlhVHlwZSI6ImFwcGxpY2F0aW9uL3ZuZC5kb2NrZXIuY29udGFpbmVyLmltYWdlLnYxK2pzb24iLCJzaXplIjo2NzMsImRpZ2VzdCI6InNoYTI1NjozYzUxYjA2ZmViMGNkYThlZTYyZDBlMzc1NWVmMmE4NDk2YTZiNzFmOGE1NWIyNDVmMDdmMzFjNGJiODEzZDMxIn0sImxheWVycyI6W3sibWVkaWFUeXBlIjoiYXBwbGljYXRpb24vdm5kLmRvY2tlci5pbWFnZS5yb290ZnMuZGlmZi50YXIuZ3ppcCIsInNpemUiOjIwNDgsImRpZ2VzdCI6InNoYTI1NjpmYjZiZWVjYjc1YjM5ZjRiYjgxM2RiZjE3N2U1MDFlZGQ1ZGRiM2U2OWJiNDVjZWRlYjc4YzY3NmVlMWI3YTU5In0seyJtZWRpYVR5cGUiOiJhcHBsaWNhdGlvbi92bmQuZG9ja2VyLmltYWdlLnJvb3Rmcy5kaWZmLnRhci5nemlwIiwic2l6ZSI6MjA0OCwiZGlnZXN0Ijoic2hhMjU2OjMxOWI1ODhjZTY0MjUzYTg3YjUzM2M4ZWQwMWNmMDAyNWUwZWFjOThlN2I1MTZlMTI1MzI5NTdlMTI0NGZkZWMifV19",
"config": "eyJhcmNoaXRlY3R1cmUiOiJhbWQ2NCIsImNvbmZpZyI6eyJFbnYiOlsiUEFUSD0vdXNyL2xvY2FsL3NiaW46L3Vzci9sb2NhbC9iaW46L3Vzci9zYmluOi91c3IvYmluOi9zYmluOi9iaW4iXSwiV29ya2luZ0RpciI6Ii8iLCJPbkJ1aWxkIjpudWxsfSwiY3JlYXRlZCI6IjIwMjItMDgtMDFUMjA6MDk6MjIuNTA5NDIxNzEyWiIsImhpc3RvcnkiOlt7ImNyZWF0ZWQiOiIyMDIyLTA4LTAxVDIwOjA5OjIyLjQ4Nzg5NTUxOVoiLCJjcmVhdGVkX2J5IjoiQUREIGZpbGUtMS50eHQgL3NvbWVmaWxlLTEudHh0ICMgYnVpbGRraXQiLCJjb21tZW50IjoiYnVpbGRraXQuZG9ja2VyZmlsZS52MCJ9LHsiY3JlYXRlZCI6IjIwMjItMDgtMDFUMjA6MDk6MjIuNTA5NDIxNzEyWiIsImNyZWF0ZWRfYnkiOiJBREQgZmlsZS0yLnR4dCAvc29tZWZpbGUtMi50eHQgIyBidWlsZGtpdCIsImNvbW1lbnQiOiJidWlsZGtpdC5kb2NrZXJmaWxlLnYwIn1dLCJvcyI6ImxpbnV4Iiwicm9vdGZzIjp7InR5cGUiOiJsYXllcnMiLCJkaWZmX2lkcyI6WyJzaGEyNTY6ZmI2YmVlY2I3NWIzOWY0YmI4MTNkYmYxNzdlNTAxZWRkNWRkYjNlNjliYjQ1Y2VkZWI3OGM2NzZlZTFiN2E1OSIsInNoYTI1NjozMTliNTg4Y2U2NDI1M2E4N2I1MzNjOGVkMDFjZjAwMjVlMGVhYzk4ZTdiNTE2ZTEyNTMyOTU3ZTEyNDRmZGVjIl19fQ==",
"manifest": "eyJzY2hlbWFWZXJzaW9uIjoyLCJtZWRpYVR5cGUiOiJhcHBsaWNhdGlvbi92bmQuZG9ja2VyLmRpc3RyaWJ1dGlvbi5tYW5pZmVzdC52Mitqc29uIiwiY29uZmlnIjp7Im1lZGlhVHlwZSI6ImFwcGxpY2F0aW9uL3ZuZC5kb2NrZXIuY29udGFpbmVyLmltYWdlLnYxK2pzb24iLCJzaXplIjo2NzEsImRpZ2VzdCI6InNoYTI1NjowY2I0Mzk1NzkxOTg2YmRhMTc1NjJiZDZmNzY4MTFiYjZmMTYzZjY4NmUxOTgzOTcxOTdlZjgyNDFiZWQ1OGRmIn0sImxheWVycyI6W3sibWVkaWFUeXBlIjoiYXBwbGljYXRpb24vdm5kLmRvY2tlci5pbWFnZS5yb290ZnMuZGlmZi50YXIuZ3ppcCIsInNpemUiOjIwNDgsImRpZ2VzdCI6InNoYTI1Njo3ZTEzOTMxMGJkNmNlMDk1NmQ2NWE3MGQyNmE2ZDMxYjI0MGE0ZjQ3MDk0YTgzMTYzOGYwNWQzODFiNmM0MjRhIn0seyJtZWRpYVR5cGUiOiJhcHBsaWNhdGlvbi92bmQuZG9ja2VyLmltYWdlLnJvb3Rmcy5kaWZmLnRhci5nemlwIiwic2l6ZSI6MjA0OCwiZGlnZXN0Ijoic2hhMjU2OmNjODMzYmYzMWE0ODBjMDY0ZDY1Y2E2N2VlMzdmNzdmMGQwYzhhYjk4ZWVkZGU3YjI4NmFkMWVmNmY1YmRjYWMifV19",
"config": "eyJhcmNoaXRlY3R1cmUiOiJhcm02NCIsImNvbmZpZyI6eyJFbnYiOlsiUEFUSD0vdXNyL2xvY2FsL3NiaW46L3Vzci9sb2NhbC9iaW46L3Vzci9zYmluOi91c3IvYmluOi9zYmluOi9iaW4iXSwiV29ya2luZ0RpciI6Ii8iLCJPbkJ1aWxkIjpudWxsfSwiY3JlYXRlZCI6IjIwMjMtMDQtMThUMTQ6MDk6NDIuMzAxMDI2MzhaIiwiaGlzdG9yeSI6W3siY3JlYXRlZCI6IjIwMjMtMDQtMThUMTQ6MDk6NDIuMjg3OTQyNzEzWiIsImNyZWF0ZWRfYnkiOiJBREQgZmlsZS0xLnR4dCAvc29tZWZpbGUtMS50eHQgIyBidWlsZGtpdCIsImNvbW1lbnQiOiJidWlsZGtpdC5kb2NrZXJmaWxlLnYwIn0seyJjcmVhdGVkIjoiMjAyMy0wNC0xOFQxNDowOTo0Mi4zMDEwMjYzOFoiLCJjcmVhdGVkX2J5IjoiQUREIGZpbGUtMi50eHQgL3NvbWVmaWxlLTIudHh0ICMgYnVpbGRraXQiLCJjb21tZW50IjoiYnVpbGRraXQuZG9ja2VyZmlsZS52MCJ9XSwib3MiOiJsaW51eCIsInJvb3RmcyI6eyJ0eXBlIjoibGF5ZXJzIiwiZGlmZl9pZHMiOlsic2hhMjU2OjdlMTM5MzEwYmQ2Y2UwOTU2ZDY1YTcwZDI2YTZkMzFiMjQwYTRmNDcwOTRhODMxNjM4ZjA1ZDM4MWI2YzQyNGEiLCJzaGEyNTY6Y2M4MzNiZjMxYTQ4MGMwNjRkNjVjYTY3ZWUzN2Y3N2YwZDBjOGFiOThlZWRkZTdiMjg2YWQxZWY2ZjViZGNhYyJdfX0=",
"repoDigests": [],
"architecture": "",
"os": ""
@ -110,9 +110,5 @@
"configuration": {
"config-key": "config-value"
}
},
"schema": {
"version": "7.1.4",
"url": "https://raw.githubusercontent.com/anchore/syft/main/schema/json/schema-7.1.4.json"
}
}

View file

@ -40,6 +40,23 @@ func newPackageForIndexWithMetadata(name, version string, metadata pkg.PythonPip
return p
}
func newPackageForRequirementsWithMetadata(name, version string, metadata pkg.PythonRequirementsMetadata, locations ...source.Location) pkg.Package {
p := pkg.Package{
Name: name,
Version: version,
Locations: source.NewLocationSet(locations...),
PURL: packageURL(name, version, nil),
Language: pkg.Python,
Type: pkg.PythonPkg,
MetadataType: pkg.PythonRequirementsMetadataType,
Metadata: metadata,
}
p.SetID()
return p
}
func newPackageForPackage(m pkg.PythonPackageMetadata, sources ...source.Location) pkg.Package {
var licenses []string
if m.License != "" {

View file

@ -3,6 +3,7 @@ package python
import (
"bufio"
"fmt"
"regexp"
"strings"
"unicode"
@ -15,6 +16,11 @@ import (
var _ generic.Parser = parseRequirementsTxt
var (
extrasRegex = regexp.MustCompile(`\[.*\]`)
urlRegex = regexp.MustCompile("@.*git.*")
)
// parseRequirementsTxt takes a Python requirements.txt file, returning all Python packages that are locked to a
// specific version.
func parseRequirementsTxt(_ source.FileResolver, _ *generic.Environment, reader source.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
@ -23,6 +29,7 @@ func parseRequirementsTxt(_ source.FileResolver, _ *generic.Environment, reader
scanner := bufio.NewScanner(reader)
for scanner.Scan() {
line := scanner.Text()
rawLineNoComments := removeTrailingComment(line)
line = trimRequirementsTxtLine(line)
if line == "" {
@ -57,15 +64,25 @@ func parseRequirementsTxt(_ source.FileResolver, _ *generic.Environment, reader
return !unicode.IsLetter(r) && !unicode.IsNumber(r)
})
// TODO: Update to support more than only ==
versionConstraint := fmt.Sprintf("== %s", version)
if name == "" || version == "" {
log.WithFields("path", reader.RealPath).Debugf("found empty package in requirements.txt line: %q", line)
continue
}
packages = append(
packages,
newPackageForIndex(
newPackageForRequirementsWithMetadata(
name,
version,
pkg.PythonRequirementsMetadata{
Name: name,
Extras: parseExtras(rawLineNoComments),
VersionConstraint: versionConstraint,
URL: parseURL(rawLineNoComments),
Markers: parseMarkers(rawLineNoComments),
},
reader.Location.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation),
),
)
@ -93,6 +110,7 @@ func trimRequirementsTxtLine(line string) string {
line = strings.TrimSpace(line)
line = removeTrailingComment(line)
line = removeEnvironmentMarkers(line)
line = checkForRegex(line) // remove extras and url from line if found
return line
}
@ -121,3 +139,84 @@ func removeEnvironmentMarkers(line string) string {
return parts[0]
}
func parseExtras(packageName string) []string {
if extrasRegex.MatchString(packageName) {
// Remove square brackets
extras := strings.TrimFunc(extrasRegex.FindString(packageName), func(r rune) bool {
return !unicode.IsLetter(r) && !unicode.IsNumber(r)
})
// Remove any additional whitespace
extras = strings.ReplaceAll(extras, " ", "")
return strings.Split(extras, ",")
}
return []string{}
}
func parseMarkers(line string) map[string]string {
markers := map[string]string{}
parts := strings.SplitN(line, ";", 2)
if len(parts) == 2 {
splittableMarkers := parts[1]
for _, combineString := range []string{" or ", " and "} {
splittableMarkers = strings.TrimSpace(
strings.ReplaceAll(splittableMarkers, combineString, ","),
)
}
splittableMarkers = strings.TrimSpace(splittableMarkers)
for _, mark := range strings.Split(splittableMarkers, ",") {
markparts := strings.Split(mark, " ")
markers[markparts[0]] = strings.Join(markparts[1:], " ")
}
}
return markers
}
func parseURL(line string) string {
parts := strings.Split(line, "@")
if len(parts) > 1 {
desiredIndex := -1
for index, part := range parts {
part := strings.TrimFunc(part, func(r rune) bool {
return !unicode.IsLetter(r) && !unicode.IsNumber(r)
})
if strings.HasPrefix(part, "git") {
desiredIndex = index
break
}
}
if desiredIndex != -1 {
return strings.TrimSpace(strings.Join(parts[desiredIndex:], "@"))
}
}
return ""
}
// function to check a string for all possilbe regex expressions, replacing it if found
func checkForRegex(stringToCheck string) string {
stringToReturn := stringToCheck
for _, r := range []*regexp.Regexp{
urlRegex,
extrasRegex,
} {
if r.MatchString(stringToCheck) {
stringToReturn = r.ReplaceAllString(stringToCheck, "")
}
}
return stringToReturn
}

View file

@ -20,6 +20,14 @@ func TestParseRequirementsTxt(t *testing.T) {
Locations: locations,
Language: pkg.Python,
Type: pkg.PythonPkg,
MetadataType: pkg.PythonRequirementsMetadataType,
Metadata: pkg.PythonRequirementsMetadata{
Name: "flask",
Extras: []string{},
VersionConstraint: "== 4.0.0",
URL: "",
Markers: map[string]string{},
},
},
{
Name: "foo",
@ -28,6 +36,14 @@ func TestParseRequirementsTxt(t *testing.T) {
Locations: locations,
Language: pkg.Python,
Type: pkg.PythonPkg,
MetadataType: pkg.PythonRequirementsMetadataType,
Metadata: pkg.PythonRequirementsMetadata{
Name: "foo",
Extras: []string{},
VersionConstraint: "== 1.0.0",
URL: "",
Markers: map[string]string{},
},
},
{
Name: "SomeProject",
@ -36,6 +52,14 @@ func TestParseRequirementsTxt(t *testing.T) {
Locations: locations,
Language: pkg.Python,
Type: pkg.PythonPkg,
MetadataType: pkg.PythonRequirementsMetadataType,
Metadata: pkg.PythonRequirementsMetadata{
Name: "SomeProject",
Extras: []string{},
VersionConstraint: "== 5.4",
URL: "",
Markers: map[string]string{"python_version": "< '3.8'"},
},
},
{
Name: "argh",
@ -44,6 +68,14 @@ func TestParseRequirementsTxt(t *testing.T) {
Locations: locations,
Language: pkg.Python,
Type: pkg.PythonPkg,
MetadataType: pkg.PythonRequirementsMetadataType,
Metadata: pkg.PythonRequirementsMetadata{
Name: "argh",
Extras: []string{},
VersionConstraint: "== 0.26.2",
URL: "",
Markers: map[string]string{},
},
},
{
Name: "argh",
@ -52,6 +84,65 @@ func TestParseRequirementsTxt(t *testing.T) {
Locations: locations,
Language: pkg.Python,
Type: pkg.PythonPkg,
MetadataType: pkg.PythonRequirementsMetadataType,
Metadata: pkg.PythonRequirementsMetadata{
Name: "argh",
Extras: []string{},
VersionConstraint: "== 0.26.3",
URL: "",
Markers: map[string]string{},
},
},
{
Name: "celery",
Version: "4.4.7",
PURL: "pkg:pypi/celery@4.4.7",
Locations: locations,
Language: pkg.Python,
Type: pkg.PythonPkg,
MetadataType: pkg.PythonRequirementsMetadataType,
Metadata: pkg.PythonRequirementsMetadata{
Name: "celery",
Extras: []string{"redis", "pytest"},
VersionConstraint: "== 4.4.7",
URL: "",
Markers: map[string]string{},
},
},
{
Name: "requests",
Version: "2.8",
PURL: "pkg:pypi/requests@2.8",
Locations: locations,
Language: pkg.Python,
Type: pkg.PythonPkg,
MetadataType: pkg.PythonRequirementsMetadataType,
Metadata: pkg.PythonRequirementsMetadata{
Name: "requests",
Extras: []string{"security"},
VersionConstraint: "== 2.8",
URL: "",
Markers: map[string]string{
"python_version": `< "2.7"`,
"sys_platform": `== "linux"`,
},
},
},
{
Name: "GithubSampleProject",
Version: "3.7.1",
PURL: "pkg:pypi/GithubSampleProject@3.7.1",
Locations: locations,
Language: pkg.Python,
Type: pkg.PythonPkg,
MetadataType: pkg.PythonRequirementsMetadataType,
Metadata: pkg.PythonRequirementsMetadata{
Name: "GithubSampleProject",
Extras: []string{},
VersionConstraint: "== 3.7.1",
URL: "git+https://github.com/owner/repo@releases/tag/v3.7.1",
Markers: map[string]string{},
},
},
}

View file

@ -17,3 +17,6 @@ argh==0.26.3 --hash=sha256:a9b3aaa1904eeb78e32394cd46c6f37ac0fb4af6dc488daa58971
# CommentedOut == 1.2.3
# maybe invalid, but found out in the wild
==2.3.4
celery[redis, pytest] == 4.4.7 # should remove [redis, pytest]
requests[security] == 2.8.* ; python_version < "2.7" and sys_platform == "linux"
GithubSampleProject == 3.7.1 @ git+https://github.com/owner/repo@releases/tag/v3.7.1

View file

@ -36,6 +36,7 @@ const (
PortageMetadataType MetadataType = "PortageMetadata"
PythonPackageMetadataType MetadataType = "PythonPackageMetadata"
PythonPipfileLockMetadataType MetadataType = "PythonPipfileLockMetadata"
PythonRequirementsMetadataType MetadataType = "PythonRequirementsMetadata"
RebarLockMetadataType MetadataType = "RebarLockMetadataType"
RpmMetadataType MetadataType = "RpmMetadata"
RustCargoPackageMetadataType MetadataType = "RustCargoPackageMetadata"
@ -67,6 +68,7 @@ var AllMetadataTypes = []MetadataType{
PortageMetadataType,
PythonPackageMetadataType,
PythonPipfileLockMetadataType,
PythonRequirementsMetadataType,
RebarLockMetadataType,
RpmMetadataType,
RustCargoPackageMetadataType,
@ -98,6 +100,7 @@ var MetadataTypeByName = map[MetadataType]reflect.Type{
PortageMetadataType: reflect.TypeOf(PortageMetadata{}),
PythonPackageMetadataType: reflect.TypeOf(PythonPackageMetadata{}),
PythonPipfileLockMetadataType: reflect.TypeOf(PythonPipfileLockMetadata{}),
PythonRequirementsMetadataType: reflect.TypeOf(PythonRequirementsMetadata{}),
RebarLockMetadataType: reflect.TypeOf(RebarLockMetadata{}),
RpmMetadataType: reflect.TypeOf(RpmMetadata{}),
RustCargoPackageMetadataType: reflect.TypeOf(CargoPackageMetadata{}),

View file

@ -0,0 +1,9 @@
package pkg
type PythonRequirementsMetadata struct {
Name string `json:"name" mapstruct:"Name"`
Extras []string `json:"extras" mapstruct:"Extras"`
VersionConstraint string `json:"versionConstraint" mapstruct:"VersionConstraint"`
URL string `json:"url" mapstruct:"URL"`
Markers map[string]string `json:"markers" mapstruct:"Markers"`
}