Overwrite deprecated SPDX licenses automatically (#1009)

Co-authored-by: Alex Goodman <alex.goodman@anchore.com>
This commit is contained in:
Jonas Xavier 2022-08-02 12:25:33 -07:00 committed by GitHub
parent e68f384063
commit 69fb0a6f3b
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 7664 additions and 232 deletions

View file

@ -49,7 +49,7 @@ func Test_encodeLicense(t *testing.T) {
},
expected: &cyclonedx.Licenses{
{License: &cyclonedx.License{ID: "MIT"}},
{License: &cyclonedx.License{ID: "GPL-3.0"}},
{License: &cyclonedx.License{ID: "GPL-3.0-only"}},
},
},
{
@ -60,7 +60,7 @@ func Test_encodeLicense(t *testing.T) {
},
},
expected: &cyclonedx.Licenses{
{License: &cyclonedx.License{ID: "GPL-3.0"}},
{License: &cyclonedx.License{ID: "GPL-3.0-only"}},
},
},
{
@ -71,7 +71,7 @@ func Test_encodeLicense(t *testing.T) {
},
},
expected: &cyclonedx.Licenses{
{License: &cyclonedx.License{ID: "GPL-2.0"}},
{License: &cyclonedx.License{ID: "GPL-2.0-only"}},
},
},
}

View file

@ -44,7 +44,7 @@ func Test_License(t *testing.T) {
"GPL-3.0",
},
},
expected: "MIT AND GPL-3.0",
expected: "MIT AND GPL-3.0-only",
},
{
name: "cap insensitive",
@ -53,7 +53,7 @@ func Test_License(t *testing.T) {
"gpl-3.0",
},
},
expected: "GPL-3.0",
expected: "GPL-3.0-only",
},
{
name: "debian to spdx conversion",
@ -62,7 +62,7 @@ func Test_License(t *testing.T) {
"GPL-2",
},
},
expected: "GPL-2.0",
expected: "GPL-2.0-only",
},
}
for _, test := range tests {

View file

@ -37,18 +37,6 @@ var licenseIDs = map[string]string{
var versionMatch = regexp.MustCompile(`-([0-9]+)\.?([0-9]+)?\.?([0-9]+)?\.?`)
type LicenseList struct {
Version string `json:"licenseListVersion"`
Licenses []struct {
ID string `json:"licenseId"`
Name string `json:"name"`
Text string `json:"licenseText"`
Deprecated bool `json:"isDeprecatedLicenseId"`
OSIApproved bool `json:"isOsiApproved"`
SeeAlso []string `json:"seeAlso"`
} `json:"licenses"`
}
func main() {
if err := run(); err != nil {
fmt.Println(err.Error())
@ -102,8 +90,9 @@ func run() error {
return nil
}
// Parsing the provided SPDX license list necessitates a two pass approach.
// The first pass is only related to what SPDX considers the truth. These K:V pairs will never be overwritten.
// Parsing the provided SPDX license list necessitates a three pass approach.
// The first pass is only related to what SPDX considers the truth. We use license info to
// find replacements for deprecated licenses.
// The second pass attempts to generate known short/long version listings for each key.
// For info on some short name conventions see this document:
// https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/#license-short-name.
@ -111,6 +100,8 @@ func run() error {
// The new keys are then also associated with their relative SPDX value. If a key has already been entered
// we know to ignore it since it came from the first pass which is considered the SPDX source of truth.
// We also sort the licenses for the second pass so that cases like `GPL-1` associate to `GPL-1.0` and not `GPL-1.1`.
// The third pass is for overwriting deprecated licenses with replacements, for example GPL-2.0+ is deprecated
// and now maps to GPL-2.0-or-later.
func processSPDXLicense(result LicenseList) map[string]string {
// first pass build map
var licenseIDs = make(map[string]string)
@ -122,69 +113,48 @@ func processSPDXLicense(result LicenseList) map[string]string {
licenseIDs[cleanID] = l.ID
}
// The order of variations/permutations of a license ID matters because of we how shuffle its digits,
// that is because the permutation code can generate the same value for two difference licenses,
// for example: The licenses `ABC-1.0` and `ABC-1.1` can both map to `ABC-1`,
// so we need to guarantee the order they are created to avoid mapping them wrongly. So we use a sorted list.
// To overwrite deprecated licenses during the first pass we would later on rely on map order,
// [which in go is not consistent by design](https://stackoverflow.com/a/55925880).
sort.Slice(result.Licenses, func(i, j int) bool {
return result.Licenses[i].ID < result.Licenses[j].ID
})
// second pass build exceptions
// do not overwrite if already exists
// second pass to build exceptions and replacements
replaced := strset.New()
for _, l := range result.Licenses {
var multipleID []string
cleanID := strings.ToLower(l.ID)
var replacement *License
if l.Deprecated {
replacement = result.findReplacementLicense(l)
if replacement != nil {
licenseIDs[cleanID] = replacement.ID
}
}
multipleID = append(multipleID, buildLicensePermutations(cleanID)...)
for _, id := range multipleID {
if _, exists := licenseIDs[id]; !exists {
licenseIDs[id] = l.ID
// don't make replacements for IDs that have already been replaced. Since we have a sorted license list
// the earliest replacement is correct (any future replacements are not.
// e.g. replace lgpl-2 with LGPL-2.1-only is wrong, but with LGPL-2.0-only is correct)
if replacement == nil || replaced.Has(id) {
if _, exists := licenseIDs[id]; !exists {
licenseIDs[id] = l.ID
}
} else {
// a useful debugging line during builds
log.Printf("replacing %s with %s\n", id, replacement.ID)
licenseIDs[id] = replacement.ID
replaced.Add(id)
}
}
}
return licenseIDs
}
func buildLicensePermutations(license string) (perms []string) {
lv := findLicenseVersion(license)
vp := versionPermutations(lv)
version := strings.Join(lv, ".")
for _, p := range vp {
perms = append(perms, strings.Replace(license, version, p, 1))
}
return perms
}
func findLicenseVersion(license string) (version []string) {
versionList := versionMatch.FindAllStringSubmatch(license, -1)
if len(versionList) == 0 {
return version
}
for i, v := range versionList[0] {
if v != "" && i != 0 {
version = append(version, v)
}
}
return version
}
func versionPermutations(version []string) []string {
ver := append([]string(nil), version...)
perms := strset.New()
for i := 1; i <= 3; i++ {
if len(ver) < i+1 {
ver = append(ver, "0")
}
perm := strings.Join(ver[:i], ".")
badCount := strings.Count(perm, "0") + strings.Count(perm, ".")
if badCount != len(perm) {
perms.Add(perm)
}
}
return perms.List()
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,104 @@
package main
import (
"strings"
"github.com/scylladb/go-set/strset"
)
type LicenseList struct {
Version string `json:"licenseListVersion"`
Licenses []License `json:"licenses"`
}
type License struct {
ID string `json:"licenseId"`
Name string `json:"name"`
Text string `json:"licenseText"`
Deprecated bool `json:"isDeprecatedLicenseId"`
OSIApproved bool `json:"isOsiApproved"`
SeeAlso []string `json:"seeAlso"`
}
func (l License) canReplace(other License) bool {
if l.Deprecated {
return false
}
if l.Name != other.Name {
return false
}
if l.OSIApproved != other.OSIApproved {
return false
}
if len(l.SeeAlso) != len(other.SeeAlso) {
return false
}
for i, sa := range l.SeeAlso {
if sa != other.SeeAlso[i] {
return false
}
}
return l.ID != other.ID
}
func (ll LicenseList) findReplacementLicense(deprecated License) *License {
for _, l := range ll.Licenses {
if l.canReplace(deprecated) {
return &l
}
}
return nil
}
func buildLicensePermutations(license string) (perms []string) {
lv := findLicenseVersion(license)
vp := versionPermutations(lv)
version := strings.Join(lv, ".")
for _, p := range vp {
perms = append(perms, strings.Replace(license, version, p, 1))
}
return perms
}
func findLicenseVersion(license string) (version []string) {
versionList := versionMatch.FindAllStringSubmatch(license, -1)
if len(versionList) == 0 {
return version
}
for i, v := range versionList[0] {
if v != "" && i != 0 {
version = append(version, v)
}
}
return version
}
func versionPermutations(version []string) []string {
ver := append([]string(nil), version...)
perms := strset.New()
for i := 1; i <= 3; i++ {
if len(ver) < i+1 {
ver = append(ver, "0")
}
perm := strings.Join(ver[:i], ".")
badCount := strings.Count(perm, "0") + strings.Count(perm, ".")
if badCount != len(perm) {
perms.Add(perm)
}
}
return perms.List()
}

View file

@ -0,0 +1,220 @@
package main
import (
"github.com/stretchr/testify/assert"
"strings"
"testing"
)
var (
license1 = License{
ID: "ABC-1.0+",
Name: "The ABC License 1.0",
Deprecated: true,
}
license2 = License{
ID: "ABC-1.0-Or-later",
Name: "The ABC License 1.0",
}
license3 = License{
ID: "ABC-1.0",
Name: "The ABC License 1.0 Only",
Deprecated: true,
}
license4 = License{
ID: "ABC-1.0-Only",
Name: "The ABC License 1.0 Only",
}
license5 = License{
ID: "Duh-1.0",
Name: "The Duh License 1.0",
Deprecated: true,
}
license6 = License{
ID: "Duh-1.0-duh",
Name: "The Duh License 1.0",
Deprecated: true,
}
)
func TestLicense_canReplace(t *testing.T) {
tests := []struct {
l1, l2 License
expected bool
}{
{license1, license2, false},
{license2, license1, true},
{license2, license3, false},
{license3, license2, false},
}
for _, tt := range tests {
assert.Equal(t, tt.expected, tt.l1.canReplace(tt.l2))
}
}
func TestLicensePermutations(t *testing.T) {
var tests = []struct {
shortName string
permutations []string
}{
{
"GPL-1-only",
[]string{
"GPL-1-only",
"GPL-1.0-only",
"GPL-1.0.0-only",
},
},
{
"GPL-2",
[]string{
"GPL-2",
"GPL-2.0",
"GPL-2.0.0",
},
},
{
"GPL-2.0+",
[]string{
"GPL-2+",
"GPL-2.0+",
"GPL-2.0.0+",
},
},
{
"GPL-3.0.0-or-later",
[]string{
"GPL-3-or-later",
"GPL-3.0-or-later",
"GPL-3.0.0-or-later",
},
},
{
"abc-1.1",
[]string{
"abc-1",
"abc-1.1",
"abc-1.1.0",
},
},
{
"oldap-2.0",
[]string{
"oldap-2",
"oldap-2.0",
"oldap-2.0.0",
},
},
}
for _, test := range tests {
t.Run(test.shortName, func(t *testing.T) {
perms := buildLicensePermutations(test.shortName)
assert.ElementsMatch(t, test.permutations, perms)
})
}
}
func TestVersionPermutations(t *testing.T) {
var tests = []struct {
version []string
permutations []string
}{
{
[]string{"1", "0"},
[]string{"1", "1.0", "1.0.0"},
},
{
[]string{"2"},
[]string{"2", "2.0", "2.0.0"},
},
{
[]string{"2", "0"},
[]string{"2", "2.0", "2.0.0"},
},
{
[]string{"3", "0", "0"},
[]string{"3", "3.0", "3.0.0"},
},
{
[]string{"0", "3"},
[]string{"0.3", "0.3.0"},
},
{
[]string{"0", "0", "3"},
[]string{"0.0.3"},
},
}
for _, test := range tests {
t.Run(strings.Join(test.version, "."), func(t *testing.T) {
got := versionPermutations(test.version)
assert.ElementsMatch(t, test.permutations, got)
})
}
}
func TestFindLicenseVersion(t *testing.T) {
var tests = []struct {
license string
version []string
}{
{
"GPL-1.0-only",
[]string{"1", "0"},
},
{
"GPL-2.0",
[]string{"2", "0"},
},
{
"GPL-2.0.0",
[]string{"2", "0", "0"},
},
{
"GPL-2",
[]string{"2"},
},
{
"bzip2-1",
[]string{"1"},
},
{
"php-3.01",
[]string{"3", "01"},
},
{
"oldap-2.0",
[]string{"2", "0"},
},
}
for _, test := range tests {
t.Run(test.license, func(t *testing.T) {
got := findLicenseVersion(test.license)
assert.Equal(t, test.version, got)
})
}
}
func Test_findReplacementLicense(t *testing.T) {
tests := []struct {
l License
expected *License
licenses LicenseList
}{
{license1, nil, LicenseList{}},
{license1, nil, LicenseList{Licenses: []License{license3}}},
{license1, &license2, LicenseList{Licenses: []License{license2, license3}}},
{license1, &license2, LicenseList{Licenses: []License{license2, license3, license4, license5}}},
}
for _, tt := range tests {
assert.Equal(t, tt.expected, tt.licenses.findReplacementLicense(tt.l))
}
}

File diff suppressed because it is too large Load diff

View file

@ -1,5 +1,5 @@
// Code generated by go generate; DO NOT EDIT.
// This file was generated by robots at 2022-05-09 10:01:37.625063 -0400 EDT m=+0.423632834
// This file was generated by robots at 2022-08-02 12:46:45.90647 -0400 EDT m=+0.327870639
// using data from https://spdx.org/licenses/licenses.json
package spdxlicense
@ -524,20 +524,20 @@ var licenseIDs = map[string]string{
"glulxe": "Glulxe",
"glwtpl": "GLWTPL",
"gnuplot": "gnuplot",
"gpl-1": "GPL-1.0",
"gpl-1+": "GPL-1.0+",
"gpl-1": "GPL-1.0-only",
"gpl-1+": "GPL-1.0-or-later",
"gpl-1-only": "GPL-1.0-only",
"gpl-1-or-later": "GPL-1.0-or-later",
"gpl-1.0": "GPL-1.0",
"gpl-1.0+": "GPL-1.0+",
"gpl-1.0": "GPL-1.0-only",
"gpl-1.0+": "GPL-1.0-or-later",
"gpl-1.0-only": "GPL-1.0-only",
"gpl-1.0-or-later": "GPL-1.0-or-later",
"gpl-1.0.0": "GPL-1.0",
"gpl-1.0.0+": "GPL-1.0+",
"gpl-1.0.0": "GPL-1.0-only",
"gpl-1.0.0+": "GPL-1.0-or-later",
"gpl-1.0.0-only": "GPL-1.0-only",
"gpl-1.0.0-or-later": "GPL-1.0-or-later",
"gpl-2": "GPL-2.0",
"gpl-2+": "GPL-2.0+",
"gpl-2": "GPL-2.0-only",
"gpl-2+": "GPL-2.0-or-later",
"gpl-2-only": "GPL-2.0-only",
"gpl-2-or-later": "GPL-2.0-or-later",
"gpl-2-with-autoconf-exception": "GPL-2.0-with-autoconf-exception",
@ -545,8 +545,8 @@ var licenseIDs = map[string]string{
"gpl-2-with-classpath-exception": "GPL-2.0-with-classpath-exception",
"gpl-2-with-font-exception": "GPL-2.0-with-font-exception",
"gpl-2-with-gcc-exception": "GPL-2.0-with-GCC-exception",
"gpl-2.0": "GPL-2.0",
"gpl-2.0+": "GPL-2.0+",
"gpl-2.0": "GPL-2.0-only",
"gpl-2.0+": "GPL-2.0-or-later",
"gpl-2.0-only": "GPL-2.0-only",
"gpl-2.0-or-later": "GPL-2.0-or-later",
"gpl-2.0-with-autoconf-exception": "GPL-2.0-with-autoconf-exception",
@ -554,8 +554,8 @@ var licenseIDs = map[string]string{
"gpl-2.0-with-classpath-exception": "GPL-2.0-with-classpath-exception",
"gpl-2.0-with-font-exception": "GPL-2.0-with-font-exception",
"gpl-2.0-with-gcc-exception": "GPL-2.0-with-GCC-exception",
"gpl-2.0.0": "GPL-2.0",
"gpl-2.0.0+": "GPL-2.0+",
"gpl-2.0.0": "GPL-2.0-only",
"gpl-2.0.0+": "GPL-2.0-or-later",
"gpl-2.0.0-only": "GPL-2.0-only",
"gpl-2.0.0-or-later": "GPL-2.0-or-later",
"gpl-2.0.0-with-autoconf-exception": "GPL-2.0-with-autoconf-exception",
@ -563,20 +563,20 @@ var licenseIDs = map[string]string{
"gpl-2.0.0-with-classpath-exception": "GPL-2.0-with-classpath-exception",
"gpl-2.0.0-with-font-exception": "GPL-2.0-with-font-exception",
"gpl-2.0.0-with-gcc-exception": "GPL-2.0-with-GCC-exception",
"gpl-3": "GPL-3.0",
"gpl-3+": "GPL-3.0+",
"gpl-3": "GPL-3.0-only",
"gpl-3+": "GPL-3.0-or-later",
"gpl-3-only": "GPL-3.0-only",
"gpl-3-or-later": "GPL-3.0-or-later",
"gpl-3-with-autoconf-exception": "GPL-3.0-with-autoconf-exception",
"gpl-3-with-gcc-exception": "GPL-3.0-with-GCC-exception",
"gpl-3.0": "GPL-3.0",
"gpl-3.0+": "GPL-3.0+",
"gpl-3.0": "GPL-3.0-only",
"gpl-3.0+": "GPL-3.0-or-later",
"gpl-3.0-only": "GPL-3.0-only",
"gpl-3.0-or-later": "GPL-3.0-or-later",
"gpl-3.0-with-autoconf-exception": "GPL-3.0-with-autoconf-exception",
"gpl-3.0-with-gcc-exception": "GPL-3.0-with-GCC-exception",
"gpl-3.0.0": "GPL-3.0",
"gpl-3.0.0+": "GPL-3.0+",
"gpl-3.0.0": "GPL-3.0-only",
"gpl-3.0.0+": "GPL-3.0-or-later",
"gpl-3.0.0-only": "GPL-3.0-only",
"gpl-3.0.0-or-later": "GPL-3.0-or-later",
"gpl-3.0.0-with-autoconf-exception": "GPL-3.0-with-autoconf-exception",
@ -622,36 +622,36 @@ var licenseIDs = map[string]string{
"lal-1.3.0": "LAL-1.3",
"latex2e": "Latex2e",
"leptonica": "Leptonica",
"lgpl-2": "LGPL-2.0",
"lgpl-2+": "LGPL-2.0+",
"lgpl-2": "LGPL-2.1-only",
"lgpl-2+": "LGPL-2.0-or-later",
"lgpl-2-only": "LGPL-2.0-only",
"lgpl-2-or-later": "LGPL-2.0-or-later",
"lgpl-2.0": "LGPL-2.0",
"lgpl-2.0+": "LGPL-2.0+",
"lgpl-2.0": "LGPL-2.0-only",
"lgpl-2.0+": "LGPL-2.0-or-later",
"lgpl-2.0-only": "LGPL-2.0-only",
"lgpl-2.0-or-later": "LGPL-2.0-or-later",
"lgpl-2.0.0": "LGPL-2.0",
"lgpl-2.0.0+": "LGPL-2.0+",
"lgpl-2.0.0": "LGPL-2.0-only",
"lgpl-2.0.0+": "LGPL-2.0-or-later",
"lgpl-2.0.0-only": "LGPL-2.0-only",
"lgpl-2.0.0-or-later": "LGPL-2.0-or-later",
"lgpl-2.1": "LGPL-2.1",
"lgpl-2.1": "LGPL-2.1-only",
"lgpl-2.1+": "LGPL-2.1+",
"lgpl-2.1-only": "LGPL-2.1-only",
"lgpl-2.1-or-later": "LGPL-2.1-or-later",
"lgpl-2.1.0": "LGPL-2.1",
"lgpl-2.1.0": "LGPL-2.1-only",
"lgpl-2.1.0+": "LGPL-2.1+",
"lgpl-2.1.0-only": "LGPL-2.1-only",
"lgpl-2.1.0-or-later": "LGPL-2.1-or-later",
"lgpl-3": "LGPL-3.0",
"lgpl-3+": "LGPL-3.0+",
"lgpl-3": "LGPL-3.0-only",
"lgpl-3+": "LGPL-3.0-or-later",
"lgpl-3-only": "LGPL-3.0-only",
"lgpl-3-or-later": "LGPL-3.0-or-later",
"lgpl-3.0": "LGPL-3.0",
"lgpl-3.0+": "LGPL-3.0+",
"lgpl-3.0": "LGPL-3.0-only",
"lgpl-3.0+": "LGPL-3.0-or-later",
"lgpl-3.0-only": "LGPL-3.0-only",
"lgpl-3.0-or-later": "LGPL-3.0-or-later",
"lgpl-3.0.0": "LGPL-3.0",
"lgpl-3.0.0+": "LGPL-3.0+",
"lgpl-3.0.0": "LGPL-3.0-only",
"lgpl-3.0.0+": "LGPL-3.0-or-later",
"lgpl-3.0.0-only": "LGPL-3.0-only",
"lgpl-3.0.0-or-later": "LGPL-3.0-or-later",
"lgpllr": "LGPLLR",

View file

@ -10,6 +10,7 @@ func TestLicenceListIDs(t *testing.T) {
// do a sanity check on the generated data
assert.Equal(t, "0BSD", licenseIDs["0bsd"])
assert.Equal(t, "ZPL-2.1", licenseIDs["zpl-2.1"])
assert.Equal(t, "GPL-2.0", licenseIDs["gpl-2"])
assert.Equal(t, "GPL-2.0-only", licenseIDs["gpl-2"])
assert.Equal(t, "GPL-2.0-or-later", licenseIDs["gpl-2+"])
assert.NotEmpty(t, Version)
}

View file

@ -17,11 +17,11 @@ func TestIDParse(t *testing.T) {
},
{
"GPL-2",
"GPL-2.0",
"GPL-2.0-only",
},
{
"GPL-2+",
"GPL-2.0+",
"GPL-2.0-or-later",
},
{
"GPL-3.0.0-or-later",