1577 spdxlicense generate (#1691)

Update the license_list.go to have more permissible inputs for greater SPDXID matching.
EX:
GPL3 gpl3 gpl-3 and GPL-3 can all map to GPL-3.0-only

By moving all strings to lower and removing the "-" we're able to return valid SPDX license ID for a greater diversity of input strings.
---------

Signed-off-by: Christopher Phillips <christopher.phillips@anchore.com>
This commit is contained in:
Christopher Angelo Phillips 2023-03-23 11:48:24 -04:00 committed by GitHub
parent 539bc2afcb
commit f473bb75a8
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 2457 additions and 2427 deletions

View file

@ -11,8 +11,6 @@ import (
"strings"
"text/template"
"time"
"github.com/scylladb/go-set/strset"
)
// This program generates license_list.go.
@ -35,7 +33,7 @@ var licenseIDs = map[string]string{
}
`))
var versionMatch = regexp.MustCompile(`-([0-9]+)\.?([0-9]+)?\.?([0-9]+)?\.?`)
var versionMatch = regexp.MustCompile(`([0-9]+)\.?([0-9]+)?\.?([0-9]+)?\.?`)
func main() {
if err := run(); err != nil {
@ -49,7 +47,6 @@ func run() error {
if err != nil {
return fmt.Errorf("unable to get licenses list: %w", err)
}
var result LicenseList
if err = json.NewDecoder(resp.Body).Decode(&result); err != nil {
return fmt.Errorf("unable to decode license list: %w", err)
@ -103,58 +100,59 @@ func run() error {
// The third pass is for overwriting deprecated licenses with replacements, for example GPL-2.0+ is deprecated
// and now maps to GPL-2.0-or-later.
func processSPDXLicense(result LicenseList) map[string]string {
// first pass build map
var licenseIDs = make(map[string]string)
for _, l := range result.Licenses {
cleanID := strings.ToLower(l.ID)
if _, exists := licenseIDs[cleanID]; exists {
log.Fatalf("duplicate license ID found: %q", cleanID)
}
licenseIDs[cleanID] = l.ID
}
// The order of variations/permutations of a license ID matters because of we how shuffle its digits,
// that is because the permutation code can generate the same value for two difference licenses,
// for example: The licenses `ABC-1.0` and `ABC-1.1` can both map to `ABC-1`,
// so we need to guarantee the order they are created to avoid mapping them wrongly. So we use a sorted list.
// To overwrite deprecated licenses during the first pass we would later on rely on map order,
// [which in go is not consistent by design](https://stackoverflow.com/a/55925880).
// The order of variations/permutations of a license ID matter.
// The permutation code can generate the same value for two difference licenses,
// for example: The licenses `ABC-1.0` and `ABC-1.1` can both map to `ABC1`,
// we need to guarantee the order they are created to avoid mapping them incorrectly.
// To do this we use a sorted list.
sort.Slice(result.Licenses, func(i, j int) bool {
return result.Licenses[i].ID < result.Licenses[j].ID
})
// second pass to build exceptions and replacements
replaced := strset.New()
// keys are simplified by removing dashes and lowercasing ID
// this is so license declarations in the wild like: LGPL3 LGPL-3 lgpl3 and lgpl-3 can all match
licenseIDs := make(map[string]string)
for _, l := range result.Licenses {
var multipleID []string
cleanID := strings.ToLower(l.ID)
// licensePerms includes the cleanID in return slice
cleanID := cleanLicenseID(l.ID)
licensePerms := buildLicenseIDPermutations(cleanID)
var replacement *License
// if license is deprecated, find its replacement and add to licenseIDs
if l.Deprecated {
replacement = result.findReplacementLicense(l)
idToMap := l.ID
replacement := result.findReplacementLicense(l)
if replacement != nil {
licenseIDs[cleanID] = replacement.ID
idToMap = replacement.ID
}
// it's important to use the original licensePerms here so that the deprecated license
// can now point to the new correct license
for _, id := range licensePerms {
if _, exists := licenseIDs[id]; exists {
// can be used to debug duplicate license permutations and confirm that examples like GPL1
// do not point to GPL-1.1
// log.Println("duplicate license list permutation found when mapping deprecated license to replacement")
// log.Printf("already have key: %q for SPDX ID: %q; attempted to map replacement ID: %q for deprecated ID: %q\n", id, value, replacement.ID, l.ID)
continue
}
licenseIDs[id] = idToMap
}
}
multipleID = append(multipleID, buildLicensePermutations(cleanID)...)
for _, id := range multipleID {
// don't make replacements for IDs that have already been replaced. Since we have a sorted license list
// the earliest replacement is correct (any future replacements are not.
// e.g. replace lgpl-2 with LGPL-2.1-only is wrong, but with LGPL-2.0-only is correct)
if replacement == nil || replaced.Has(id) {
if _, exists := licenseIDs[id]; !exists {
licenseIDs[id] = l.ID
}
} else {
// a useful debugging line during builds
log.Printf("replacing %s with %s\n", id, replacement.ID)
licenseIDs[id] = replacement.ID
replaced.Add(id)
// if license is not deprecated, add all permutations to licenseIDs
for _, id := range licensePerms {
if _, exists := licenseIDs[id]; exists {
// log.Println("found duplicate license permutation key for non deprecated license")
// log.Printf("already have key: %q for SPDX ID: %q; tried to insert as SPDX ID:%q\n", id, value, l.ID)
continue
}
licenseIDs[id] = l.ID
}
}
return licenseIDs
}
func cleanLicenseID(id string) string {
cleanID := strings.ToLower(id)
return strings.ReplaceAll(cleanID, "-", "")
}

File diff suppressed because it is too large Load diff

View file

@ -20,51 +20,7 @@ type License struct {
SeeAlso []string `json:"seeAlso"`
}
func (l License) canReplace(other License) bool {
if l.Deprecated {
return false
}
// We want to replace deprecated licenses with non-deprecated counterparts
// For more information, see: https://github.com/spdx/license-list-XML/issues/1676
if other.Deprecated {
switch {
case strings.ReplaceAll(l.ID, "-only", "") == other.ID:
return true
case strings.ReplaceAll(l.ID, "-or-later", "+") == other.ID:
return true
case l.ID == "BSD-2-Clause" && other.ID == "BSD-2-Clause-NetBSD":
return true
case l.ID == "BSD-2-Clause-Views" && other.ID == "BSD-2-Clause-FreeBSD":
return true
case l.ID == "bzip2-1.0.6" && other.ID == "bzip2-1.0.5":
return true
case l.ID == "SMLNJ" && other.ID == "StandardML-NJ":
return true
}
}
if l.Name != other.Name {
return false
}
if l.OSIApproved != other.OSIApproved {
return false
}
if len(l.SeeAlso) != len(other.SeeAlso) {
return false
}
for i, sa := range l.SeeAlso {
if sa != other.SeeAlso[i] {
return false
}
}
return l.ID == other.ID
}
// findReplacementLicense returns a replacement license for a deprecated license
func (ll LicenseList) findReplacementLicense(deprecated License) *License {
for _, l := range ll.Licenses {
if l.canReplace(deprecated) {
@ -75,16 +31,62 @@ func (ll LicenseList) findReplacementLicense(deprecated License) *License {
return nil
}
func buildLicensePermutations(license string) (perms []string) {
lv := findLicenseVersion(license)
vp := versionPermutations(lv)
version := strings.Join(lv, ".")
for _, p := range vp {
perms = append(perms, strings.Replace(license, version, p, 1))
func (l License) canReplace(deprecated License) bool {
// don't replace a license with a deprecated license
if l.Deprecated {
return false
}
return perms
// We want to replace deprecated licenses with non-deprecated counterparts
// For more information, see: https://github.com/spdx/license-list-XML/issues/1676
switch {
case strings.ReplaceAll(l.ID, "-only", "") == deprecated.ID:
return true
case strings.ReplaceAll(l.ID, "-or-later", "+") == deprecated.ID:
return true
case l.ID == "BSD-2-Clause" && deprecated.ID == "BSD-2-Clause-NetBSD":
return true
case l.ID == "BSD-2-Clause-Views" && deprecated.ID == "BSD-2-Clause-FreeBSD":
return true
case l.ID == "bzip2-1.0.6" && deprecated.ID == "bzip2-1.0.5":
return true
case l.ID == "SMLNJ" && deprecated.ID == "StandardML-NJ":
return true
}
if l.Name != deprecated.Name {
return false
}
if l.OSIApproved != deprecated.OSIApproved {
return false
}
if len(l.SeeAlso) != len(deprecated.SeeAlso) {
return false
}
for i, sa := range l.SeeAlso {
if sa != deprecated.SeeAlso[i] {
return false
}
}
return l.ID == deprecated.ID
}
func buildLicenseIDPermutations(cleanID string) (perms []string) {
lv := findLicenseVersion(cleanID)
vp := versionPermutations(lv)
permSet := strset.New()
version := strings.Join(lv, ".")
for _, p := range vp {
permSet.Add(strings.Replace(cleanID, version, p, 1))
}
permSet.Add(cleanID)
return permSet.List()
}
func findLicenseVersion(license string) (version []string) {

View file

@ -67,56 +67,57 @@ func TestLicensePermutations(t *testing.T) {
{
"GPL-1-only",
[]string{
"GPL-1-only",
"GPL-1.0-only",
"GPL-1.0.0-only",
"gpl1only",
"gpl1.0only",
"gpl1.0.0only",
},
},
{
"GPL-2",
[]string{
"GPL-2",
"GPL-2.0",
"GPL-2.0.0",
"gpl2",
"gpl2.0",
"gpl2.0.0",
},
},
{
"GPL-2.0+",
[]string{
"GPL-2+",
"GPL-2.0+",
"GPL-2.0.0+",
"gpl2+",
"gpl2.0+",
"gpl2.0.0+",
},
},
{
"GPL-3.0.0-or-later",
[]string{
"GPL-3-or-later",
"GPL-3.0-or-later",
"GPL-3.0.0-or-later",
"gpl3orlater",
"gpl3.0orlater",
"gpl3.0.0orlater",
},
},
{
"abc-1.1",
[]string{
"abc-1",
"abc-1.1",
"abc-1.1.0",
"abc1",
"abc1.1",
"abc1.1.0",
},
},
{
"oldap-2.0",
[]string{
"oldap-2",
"oldap-2.0",
"oldap-2.0.0",
"oldap2",
"oldap2.0",
"oldap2.0.0",
},
},
}
for _, test := range tests {
t.Run(test.shortName, func(t *testing.T) {
perms := buildLicensePermutations(test.shortName)
cleanID := cleanLicenseID(test.shortName)
perms := buildLicenseIDPermutations(cleanID)
assert.ElementsMatch(t, test.permutations, perms)
})
}
@ -183,10 +184,6 @@ func TestFindLicenseVersion(t *testing.T) {
"GPL-2",
[]string{"2"},
},
{
"bzip2-1",
[]string{"1"},
},
{
"php-3.01",
[]string{"3", "01"},

View file

@ -18,16 +18,20 @@ const (
//go:generate go run ./generate
func ID(id string) (value, other string, exists bool) {
id = strings.TrimSpace(id)
// ignore blank strings or the joiner
if id == "" || id == "AND" {
return "", "", false
}
// ID returns the canonical license ID for the given license ID
// Note: this function is only concerned with returning a best match of an SPDX license ID
// SPDX Expressions will be handled by a parent package which will call this function
func ID(id string) (value string, exists bool) {
// first look for a canonical license
if value, exists := licenseIDs[strings.ToLower(id)]; exists {
return value, "", exists
if value, exists := licenseIDs[cleanLicenseID(id)]; exists {
return value, exists
}
// we did not find, so treat it as a separate license
return "", id, true
return "", false
}
func cleanLicenseID(id string) string {
id = strings.TrimSpace(id)
id = strings.ToLower(id)
return strings.ReplaceAll(id, "-", "")
}

File diff suppressed because it is too large Load diff

View file

@ -9,8 +9,8 @@ import (
func TestLicenceListIDs(t *testing.T) {
// do a sanity check on the generated data
assert.Equal(t, "0BSD", licenseIDs["0bsd"])
assert.Equal(t, "ZPL-2.1", licenseIDs["zpl-2.1"])
assert.Equal(t, "GPL-2.0-only", licenseIDs["gpl-2"])
assert.Equal(t, "GPL-2.0-or-later", licenseIDs["gpl-2+"])
assert.Equal(t, "ZPL-2.1", licenseIDs["zpl2.1"])
assert.Equal(t, "GPL-2.0-only", licenseIDs["gpl2"])
assert.Equal(t, "GPL-2.0-or-later", licenseIDs["gpl2+"])
assert.NotEmpty(t, Version)
}

View file

@ -6,83 +6,70 @@ import (
"github.com/stretchr/testify/assert"
)
func TestIDParse(t *testing.T) {
func TestSPDXIDRecognition(t *testing.T) {
var tests = []struct {
shortName string
id string
other string
found bool
}{
{
"GPL-1-only",
"GPL-1.0-only",
"",
true,
},
{
"gpl1",
"GPL-1.0-only",
true,
},
{
"gpl-1",
"GPL-1.0-only",
true,
},
{
"GPL-2",
"GPL-2.0-only",
"",
true,
},
{
"GPL-2+",
"GPL-2.0-or-later",
"",
true,
},
{
"GPL-3.0.0-or-later",
"GPL-3.0-or-later",
"",
true,
},
{
"GPL-3-with-autoconf-exception",
"GPL-3.0-with-autoconf-exception",
"",
true,
},
{
"CC-by-nc-3-de",
"CC-BY-NC-3.0-DE",
"",
true,
},
// the below few cases are NOT expected, however, seem unavoidable given the current approach
{
"w3c-20150513.0.0",
"W3C-20150513",
"",
true,
},
{
"spencer-86.0.0",
"Spencer-86",
"",
true,
},
{
"unicode-dfs-2015.0.0",
"Unicode-DFS-2015",
"",
true,
},
{
"Unknown",
"",
"Unknown",
true,
},
{
" ",
"",
"",
false,
},
{
"AND",
"",
" ",
"",
false,
},
@ -90,10 +77,9 @@ func TestIDParse(t *testing.T) {
for _, test := range tests {
t.Run(test.shortName, func(t *testing.T) {
value, other, exists := ID(test.shortName)
value, exists := ID(test.shortName)
assert.Equal(t, test.found, exists)
assert.Equal(t, test.id, value)
assert.Equal(t, test.other, other)
})
}
}

View file

@ -10,14 +10,21 @@ import (
func encodeLicenses(p pkg.Package) *cyclonedx.Licenses {
lc := cyclonedx.Licenses{}
for _, licenseName := range p.Licenses {
if value, other, exists := spdxlicense.ID(licenseName); exists {
if value, exists := spdxlicense.ID(licenseName); exists {
lc = append(lc, cyclonedx.LicenseChoice{
License: &cyclonedx.License{
ID: value,
Name: other,
ID: value,
},
})
continue
}
// not found so append the licenseName as is
lc = append(lc, cyclonedx.LicenseChoice{
License: &cyclonedx.License{
Name: licenseName,
},
})
}
if len(lc) > 0 {
return &lc

View file

@ -39,12 +39,12 @@ func License(p pkg.Package) string {
func parseLicenses(raw []string) (parsedLicenses []string) {
for _, l := range raw {
if value, other, exists := spdxlicense.ID(l); exists {
parsed := value
if other != "" {
parsed = spdxlicense.LicenseRefPrefix + other
}
parsedLicenses = append(parsedLicenses, parsed)
if value, exists := spdxlicense.ID(l); exists {
parsedLicenses = append(parsedLicenses, value)
} else {
// we did not find a valid SPDX license ID so treat as separate license
otherLicense := spdxlicense.LicenseRefPrefix + l
parsedLicenses = append(parsedLicenses, otherLicense)
}
}
return