mirror of
https://github.com/anchore/syft
synced 2024-11-10 06:14:16 +00:00
1577 spdxlicense generate (#1691)
Update the license_list.go to have more permissible inputs for greater SPDXID matching. EX: GPL3 gpl3 gpl-3 and GPL-3 can all map to GPL-3.0-only By moving all strings to lower and removing the "-" we're able to return valid SPDX license ID for a greater diversity of input strings. --------- Signed-off-by: Christopher Phillips <christopher.phillips@anchore.com>
This commit is contained in:
parent
539bc2afcb
commit
f473bb75a8
10 changed files with 2457 additions and 2427 deletions
|
@ -11,8 +11,6 @@ import (
|
|||
"strings"
|
||||
"text/template"
|
||||
"time"
|
||||
|
||||
"github.com/scylladb/go-set/strset"
|
||||
)
|
||||
|
||||
// This program generates license_list.go.
|
||||
|
@ -35,7 +33,7 @@ var licenseIDs = map[string]string{
|
|||
}
|
||||
`))
|
||||
|
||||
var versionMatch = regexp.MustCompile(`-([0-9]+)\.?([0-9]+)?\.?([0-9]+)?\.?`)
|
||||
var versionMatch = regexp.MustCompile(`([0-9]+)\.?([0-9]+)?\.?([0-9]+)?\.?`)
|
||||
|
||||
func main() {
|
||||
if err := run(); err != nil {
|
||||
|
@ -49,7 +47,6 @@ func run() error {
|
|||
if err != nil {
|
||||
return fmt.Errorf("unable to get licenses list: %w", err)
|
||||
}
|
||||
|
||||
var result LicenseList
|
||||
if err = json.NewDecoder(resp.Body).Decode(&result); err != nil {
|
||||
return fmt.Errorf("unable to decode license list: %w", err)
|
||||
|
@ -103,58 +100,59 @@ func run() error {
|
|||
// The third pass is for overwriting deprecated licenses with replacements, for example GPL-2.0+ is deprecated
|
||||
// and now maps to GPL-2.0-or-later.
|
||||
func processSPDXLicense(result LicenseList) map[string]string {
|
||||
// first pass build map
|
||||
var licenseIDs = make(map[string]string)
|
||||
for _, l := range result.Licenses {
|
||||
cleanID := strings.ToLower(l.ID)
|
||||
if _, exists := licenseIDs[cleanID]; exists {
|
||||
log.Fatalf("duplicate license ID found: %q", cleanID)
|
||||
}
|
||||
licenseIDs[cleanID] = l.ID
|
||||
}
|
||||
|
||||
// The order of variations/permutations of a license ID matters because of we how shuffle its digits,
|
||||
// that is because the permutation code can generate the same value for two difference licenses,
|
||||
// for example: The licenses `ABC-1.0` and `ABC-1.1` can both map to `ABC-1`,
|
||||
// so we need to guarantee the order they are created to avoid mapping them wrongly. So we use a sorted list.
|
||||
// To overwrite deprecated licenses during the first pass we would later on rely on map order,
|
||||
// [which in go is not consistent by design](https://stackoverflow.com/a/55925880).
|
||||
// The order of variations/permutations of a license ID matter.
|
||||
// The permutation code can generate the same value for two difference licenses,
|
||||
// for example: The licenses `ABC-1.0` and `ABC-1.1` can both map to `ABC1`,
|
||||
// we need to guarantee the order they are created to avoid mapping them incorrectly.
|
||||
// To do this we use a sorted list.
|
||||
sort.Slice(result.Licenses, func(i, j int) bool {
|
||||
return result.Licenses[i].ID < result.Licenses[j].ID
|
||||
})
|
||||
|
||||
// second pass to build exceptions and replacements
|
||||
replaced := strset.New()
|
||||
// keys are simplified by removing dashes and lowercasing ID
|
||||
// this is so license declarations in the wild like: LGPL3 LGPL-3 lgpl3 and lgpl-3 can all match
|
||||
licenseIDs := make(map[string]string)
|
||||
for _, l := range result.Licenses {
|
||||
var multipleID []string
|
||||
cleanID := strings.ToLower(l.ID)
|
||||
// licensePerms includes the cleanID in return slice
|
||||
cleanID := cleanLicenseID(l.ID)
|
||||
licensePerms := buildLicenseIDPermutations(cleanID)
|
||||
|
||||
var replacement *License
|
||||
// if license is deprecated, find its replacement and add to licenseIDs
|
||||
if l.Deprecated {
|
||||
replacement = result.findReplacementLicense(l)
|
||||
idToMap := l.ID
|
||||
replacement := result.findReplacementLicense(l)
|
||||
if replacement != nil {
|
||||
licenseIDs[cleanID] = replacement.ID
|
||||
idToMap = replacement.ID
|
||||
}
|
||||
// it's important to use the original licensePerms here so that the deprecated license
|
||||
// can now point to the new correct license
|
||||
for _, id := range licensePerms {
|
||||
if _, exists := licenseIDs[id]; exists {
|
||||
// can be used to debug duplicate license permutations and confirm that examples like GPL1
|
||||
// do not point to GPL-1.1
|
||||
// log.Println("duplicate license list permutation found when mapping deprecated license to replacement")
|
||||
// log.Printf("already have key: %q for SPDX ID: %q; attempted to map replacement ID: %q for deprecated ID: %q\n", id, value, replacement.ID, l.ID)
|
||||
continue
|
||||
}
|
||||
licenseIDs[id] = idToMap
|
||||
}
|
||||
}
|
||||
|
||||
multipleID = append(multipleID, buildLicensePermutations(cleanID)...)
|
||||
for _, id := range multipleID {
|
||||
// don't make replacements for IDs that have already been replaced. Since we have a sorted license list
|
||||
// the earliest replacement is correct (any future replacements are not.
|
||||
// e.g. replace lgpl-2 with LGPL-2.1-only is wrong, but with LGPL-2.0-only is correct)
|
||||
if replacement == nil || replaced.Has(id) {
|
||||
if _, exists := licenseIDs[id]; !exists {
|
||||
licenseIDs[id] = l.ID
|
||||
}
|
||||
} else {
|
||||
// a useful debugging line during builds
|
||||
log.Printf("replacing %s with %s\n", id, replacement.ID)
|
||||
|
||||
licenseIDs[id] = replacement.ID
|
||||
replaced.Add(id)
|
||||
// if license is not deprecated, add all permutations to licenseIDs
|
||||
for _, id := range licensePerms {
|
||||
if _, exists := licenseIDs[id]; exists {
|
||||
// log.Println("found duplicate license permutation key for non deprecated license")
|
||||
// log.Printf("already have key: %q for SPDX ID: %q; tried to insert as SPDX ID:%q\n", id, value, l.ID)
|
||||
continue
|
||||
}
|
||||
licenseIDs[id] = l.ID
|
||||
}
|
||||
}
|
||||
|
||||
return licenseIDs
|
||||
}
|
||||
|
||||
func cleanLicenseID(id string) string {
|
||||
cleanID := strings.ToLower(id)
|
||||
return strings.ReplaceAll(cleanID, "-", "")
|
||||
}
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -20,51 +20,7 @@ type License struct {
|
|||
SeeAlso []string `json:"seeAlso"`
|
||||
}
|
||||
|
||||
func (l License) canReplace(other License) bool {
|
||||
if l.Deprecated {
|
||||
return false
|
||||
}
|
||||
|
||||
// We want to replace deprecated licenses with non-deprecated counterparts
|
||||
// For more information, see: https://github.com/spdx/license-list-XML/issues/1676
|
||||
if other.Deprecated {
|
||||
switch {
|
||||
case strings.ReplaceAll(l.ID, "-only", "") == other.ID:
|
||||
return true
|
||||
case strings.ReplaceAll(l.ID, "-or-later", "+") == other.ID:
|
||||
return true
|
||||
case l.ID == "BSD-2-Clause" && other.ID == "BSD-2-Clause-NetBSD":
|
||||
return true
|
||||
case l.ID == "BSD-2-Clause-Views" && other.ID == "BSD-2-Clause-FreeBSD":
|
||||
return true
|
||||
case l.ID == "bzip2-1.0.6" && other.ID == "bzip2-1.0.5":
|
||||
return true
|
||||
case l.ID == "SMLNJ" && other.ID == "StandardML-NJ":
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
if l.Name != other.Name {
|
||||
return false
|
||||
}
|
||||
|
||||
if l.OSIApproved != other.OSIApproved {
|
||||
return false
|
||||
}
|
||||
|
||||
if len(l.SeeAlso) != len(other.SeeAlso) {
|
||||
return false
|
||||
}
|
||||
|
||||
for i, sa := range l.SeeAlso {
|
||||
if sa != other.SeeAlso[i] {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
return l.ID == other.ID
|
||||
}
|
||||
|
||||
// findReplacementLicense returns a replacement license for a deprecated license
|
||||
func (ll LicenseList) findReplacementLicense(deprecated License) *License {
|
||||
for _, l := range ll.Licenses {
|
||||
if l.canReplace(deprecated) {
|
||||
|
@ -75,16 +31,62 @@ func (ll LicenseList) findReplacementLicense(deprecated License) *License {
|
|||
return nil
|
||||
}
|
||||
|
||||
func buildLicensePermutations(license string) (perms []string) {
|
||||
lv := findLicenseVersion(license)
|
||||
vp := versionPermutations(lv)
|
||||
|
||||
version := strings.Join(lv, ".")
|
||||
for _, p := range vp {
|
||||
perms = append(perms, strings.Replace(license, version, p, 1))
|
||||
func (l License) canReplace(deprecated License) bool {
|
||||
// don't replace a license with a deprecated license
|
||||
if l.Deprecated {
|
||||
return false
|
||||
}
|
||||
|
||||
return perms
|
||||
// We want to replace deprecated licenses with non-deprecated counterparts
|
||||
// For more information, see: https://github.com/spdx/license-list-XML/issues/1676
|
||||
switch {
|
||||
case strings.ReplaceAll(l.ID, "-only", "") == deprecated.ID:
|
||||
return true
|
||||
case strings.ReplaceAll(l.ID, "-or-later", "+") == deprecated.ID:
|
||||
return true
|
||||
case l.ID == "BSD-2-Clause" && deprecated.ID == "BSD-2-Clause-NetBSD":
|
||||
return true
|
||||
case l.ID == "BSD-2-Clause-Views" && deprecated.ID == "BSD-2-Clause-FreeBSD":
|
||||
return true
|
||||
case l.ID == "bzip2-1.0.6" && deprecated.ID == "bzip2-1.0.5":
|
||||
return true
|
||||
case l.ID == "SMLNJ" && deprecated.ID == "StandardML-NJ":
|
||||
return true
|
||||
}
|
||||
|
||||
if l.Name != deprecated.Name {
|
||||
return false
|
||||
}
|
||||
|
||||
if l.OSIApproved != deprecated.OSIApproved {
|
||||
return false
|
||||
}
|
||||
|
||||
if len(l.SeeAlso) != len(deprecated.SeeAlso) {
|
||||
return false
|
||||
}
|
||||
|
||||
for i, sa := range l.SeeAlso {
|
||||
if sa != deprecated.SeeAlso[i] {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
return l.ID == deprecated.ID
|
||||
}
|
||||
|
||||
func buildLicenseIDPermutations(cleanID string) (perms []string) {
|
||||
lv := findLicenseVersion(cleanID)
|
||||
vp := versionPermutations(lv)
|
||||
|
||||
permSet := strset.New()
|
||||
version := strings.Join(lv, ".")
|
||||
for _, p := range vp {
|
||||
permSet.Add(strings.Replace(cleanID, version, p, 1))
|
||||
}
|
||||
|
||||
permSet.Add(cleanID)
|
||||
return permSet.List()
|
||||
}
|
||||
|
||||
func findLicenseVersion(license string) (version []string) {
|
||||
|
|
|
@ -67,56 +67,57 @@ func TestLicensePermutations(t *testing.T) {
|
|||
{
|
||||
"GPL-1-only",
|
||||
[]string{
|
||||
"GPL-1-only",
|
||||
"GPL-1.0-only",
|
||||
"GPL-1.0.0-only",
|
||||
"gpl1only",
|
||||
"gpl1.0only",
|
||||
"gpl1.0.0only",
|
||||
},
|
||||
},
|
||||
{
|
||||
"GPL-2",
|
||||
[]string{
|
||||
"GPL-2",
|
||||
"GPL-2.0",
|
||||
"GPL-2.0.0",
|
||||
"gpl2",
|
||||
"gpl2.0",
|
||||
"gpl2.0.0",
|
||||
},
|
||||
},
|
||||
{
|
||||
"GPL-2.0+",
|
||||
[]string{
|
||||
"GPL-2+",
|
||||
"GPL-2.0+",
|
||||
"GPL-2.0.0+",
|
||||
"gpl2+",
|
||||
"gpl2.0+",
|
||||
"gpl2.0.0+",
|
||||
},
|
||||
},
|
||||
{
|
||||
"GPL-3.0.0-or-later",
|
||||
[]string{
|
||||
"GPL-3-or-later",
|
||||
"GPL-3.0-or-later",
|
||||
"GPL-3.0.0-or-later",
|
||||
"gpl3orlater",
|
||||
"gpl3.0orlater",
|
||||
"gpl3.0.0orlater",
|
||||
},
|
||||
},
|
||||
{
|
||||
"abc-1.1",
|
||||
[]string{
|
||||
"abc-1",
|
||||
"abc-1.1",
|
||||
"abc-1.1.0",
|
||||
"abc1",
|
||||
"abc1.1",
|
||||
"abc1.1.0",
|
||||
},
|
||||
},
|
||||
{
|
||||
"oldap-2.0",
|
||||
[]string{
|
||||
"oldap-2",
|
||||
"oldap-2.0",
|
||||
"oldap-2.0.0",
|
||||
"oldap2",
|
||||
"oldap2.0",
|
||||
"oldap2.0.0",
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
t.Run(test.shortName, func(t *testing.T) {
|
||||
perms := buildLicensePermutations(test.shortName)
|
||||
cleanID := cleanLicenseID(test.shortName)
|
||||
perms := buildLicenseIDPermutations(cleanID)
|
||||
assert.ElementsMatch(t, test.permutations, perms)
|
||||
})
|
||||
}
|
||||
|
@ -183,10 +184,6 @@ func TestFindLicenseVersion(t *testing.T) {
|
|||
"GPL-2",
|
||||
[]string{"2"},
|
||||
},
|
||||
{
|
||||
"bzip2-1",
|
||||
[]string{"1"},
|
||||
},
|
||||
{
|
||||
"php-3.01",
|
||||
[]string{"3", "01"},
|
||||
|
|
|
@ -18,16 +18,20 @@ const (
|
|||
|
||||
//go:generate go run ./generate
|
||||
|
||||
func ID(id string) (value, other string, exists bool) {
|
||||
id = strings.TrimSpace(id)
|
||||
// ignore blank strings or the joiner
|
||||
if id == "" || id == "AND" {
|
||||
return "", "", false
|
||||
}
|
||||
// ID returns the canonical license ID for the given license ID
|
||||
// Note: this function is only concerned with returning a best match of an SPDX license ID
|
||||
// SPDX Expressions will be handled by a parent package which will call this function
|
||||
func ID(id string) (value string, exists bool) {
|
||||
// first look for a canonical license
|
||||
if value, exists := licenseIDs[strings.ToLower(id)]; exists {
|
||||
return value, "", exists
|
||||
if value, exists := licenseIDs[cleanLicenseID(id)]; exists {
|
||||
return value, exists
|
||||
}
|
||||
// we did not find, so treat it as a separate license
|
||||
return "", id, true
|
||||
return "", false
|
||||
}
|
||||
|
||||
func cleanLicenseID(id string) string {
|
||||
id = strings.TrimSpace(id)
|
||||
id = strings.ToLower(id)
|
||||
return strings.ReplaceAll(id, "-", "")
|
||||
}
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -9,8 +9,8 @@ import (
|
|||
func TestLicenceListIDs(t *testing.T) {
|
||||
// do a sanity check on the generated data
|
||||
assert.Equal(t, "0BSD", licenseIDs["0bsd"])
|
||||
assert.Equal(t, "ZPL-2.1", licenseIDs["zpl-2.1"])
|
||||
assert.Equal(t, "GPL-2.0-only", licenseIDs["gpl-2"])
|
||||
assert.Equal(t, "GPL-2.0-or-later", licenseIDs["gpl-2+"])
|
||||
assert.Equal(t, "ZPL-2.1", licenseIDs["zpl2.1"])
|
||||
assert.Equal(t, "GPL-2.0-only", licenseIDs["gpl2"])
|
||||
assert.Equal(t, "GPL-2.0-or-later", licenseIDs["gpl2+"])
|
||||
assert.NotEmpty(t, Version)
|
||||
}
|
||||
|
|
|
@ -6,83 +6,70 @@ import (
|
|||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestIDParse(t *testing.T) {
|
||||
func TestSPDXIDRecognition(t *testing.T) {
|
||||
var tests = []struct {
|
||||
shortName string
|
||||
id string
|
||||
other string
|
||||
found bool
|
||||
}{
|
||||
{
|
||||
"GPL-1-only",
|
||||
"GPL-1.0-only",
|
||||
"",
|
||||
true,
|
||||
},
|
||||
{
|
||||
"gpl1",
|
||||
"GPL-1.0-only",
|
||||
true,
|
||||
},
|
||||
{
|
||||
"gpl-1",
|
||||
"GPL-1.0-only",
|
||||
true,
|
||||
},
|
||||
{
|
||||
"GPL-2",
|
||||
"GPL-2.0-only",
|
||||
"",
|
||||
true,
|
||||
},
|
||||
{
|
||||
"GPL-2+",
|
||||
"GPL-2.0-or-later",
|
||||
"",
|
||||
true,
|
||||
},
|
||||
{
|
||||
"GPL-3.0.0-or-later",
|
||||
"GPL-3.0-or-later",
|
||||
"",
|
||||
true,
|
||||
},
|
||||
{
|
||||
"GPL-3-with-autoconf-exception",
|
||||
"GPL-3.0-with-autoconf-exception",
|
||||
"",
|
||||
true,
|
||||
},
|
||||
{
|
||||
"CC-by-nc-3-de",
|
||||
"CC-BY-NC-3.0-DE",
|
||||
"",
|
||||
true,
|
||||
},
|
||||
// the below few cases are NOT expected, however, seem unavoidable given the current approach
|
||||
{
|
||||
"w3c-20150513.0.0",
|
||||
"W3C-20150513",
|
||||
"",
|
||||
true,
|
||||
},
|
||||
{
|
||||
"spencer-86.0.0",
|
||||
"Spencer-86",
|
||||
"",
|
||||
true,
|
||||
},
|
||||
{
|
||||
"unicode-dfs-2015.0.0",
|
||||
"Unicode-DFS-2015",
|
||||
"",
|
||||
true,
|
||||
},
|
||||
{
|
||||
"Unknown",
|
||||
"",
|
||||
"Unknown",
|
||||
true,
|
||||
},
|
||||
{
|
||||
" ",
|
||||
"",
|
||||
"",
|
||||
false,
|
||||
},
|
||||
{
|
||||
"AND",
|
||||
"",
|
||||
" ",
|
||||
"",
|
||||
false,
|
||||
},
|
||||
|
@ -90,10 +77,9 @@ func TestIDParse(t *testing.T) {
|
|||
|
||||
for _, test := range tests {
|
||||
t.Run(test.shortName, func(t *testing.T) {
|
||||
value, other, exists := ID(test.shortName)
|
||||
value, exists := ID(test.shortName)
|
||||
assert.Equal(t, test.found, exists)
|
||||
assert.Equal(t, test.id, value)
|
||||
assert.Equal(t, test.other, other)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
|
|
@ -10,14 +10,21 @@ import (
|
|||
func encodeLicenses(p pkg.Package) *cyclonedx.Licenses {
|
||||
lc := cyclonedx.Licenses{}
|
||||
for _, licenseName := range p.Licenses {
|
||||
if value, other, exists := spdxlicense.ID(licenseName); exists {
|
||||
if value, exists := spdxlicense.ID(licenseName); exists {
|
||||
lc = append(lc, cyclonedx.LicenseChoice{
|
||||
License: &cyclonedx.License{
|
||||
ID: value,
|
||||
Name: other,
|
||||
ID: value,
|
||||
},
|
||||
})
|
||||
continue
|
||||
}
|
||||
|
||||
// not found so append the licenseName as is
|
||||
lc = append(lc, cyclonedx.LicenseChoice{
|
||||
License: &cyclonedx.License{
|
||||
Name: licenseName,
|
||||
},
|
||||
})
|
||||
}
|
||||
if len(lc) > 0 {
|
||||
return &lc
|
||||
|
|
|
@ -39,12 +39,12 @@ func License(p pkg.Package) string {
|
|||
|
||||
func parseLicenses(raw []string) (parsedLicenses []string) {
|
||||
for _, l := range raw {
|
||||
if value, other, exists := spdxlicense.ID(l); exists {
|
||||
parsed := value
|
||||
if other != "" {
|
||||
parsed = spdxlicense.LicenseRefPrefix + other
|
||||
}
|
||||
parsedLicenses = append(parsedLicenses, parsed)
|
||||
if value, exists := spdxlicense.ID(l); exists {
|
||||
parsedLicenses = append(parsedLicenses, value)
|
||||
} else {
|
||||
// we did not find a valid SPDX license ID so treat as separate license
|
||||
otherLicense := spdxlicense.LicenseRefPrefix + l
|
||||
parsedLicenses = append(parsedLicenses, otherLicense)
|
||||
}
|
||||
}
|
||||
return
|
||||
|
|
Loading…
Reference in a new issue