mirror of
https://github.com/anchore/syft
synced 2024-11-13 23:57:07 +00:00
f473bb75a8
Update the license_list.go to have more permissible inputs for greater SPDXID matching. EX: GPL3 gpl3 gpl-3 and GPL-3 can all map to GPL-3.0-only By moving all strings to lower and removing the "-" we're able to return valid SPDX license ID for a greater diversity of input strings. --------- Signed-off-by: Christopher Phillips <christopher.phillips@anchore.com>
158 lines
5.1 KiB
Go
158 lines
5.1 KiB
Go
package main
|
|
|
|
import (
|
|
"encoding/json"
|
|
"fmt"
|
|
"log"
|
|
"net/http"
|
|
"os"
|
|
"regexp"
|
|
"sort"
|
|
"strings"
|
|
"text/template"
|
|
"time"
|
|
)
|
|
|
|
// This program generates license_list.go.
|
|
const (
|
|
source = "license_list.go"
|
|
url = "https://spdx.org/licenses/licenses.json"
|
|
)
|
|
|
|
var tmp = template.Must(template.New("").Parse(`// Code generated by go generate; DO NOT EDIT.
|
|
// This file was generated by robots at {{ .Timestamp }}
|
|
// using data from {{ .URL }}
|
|
package spdxlicense
|
|
|
|
const Version = {{ printf "%q" .Version }}
|
|
|
|
var licenseIDs = map[string]string{
|
|
{{- range $k, $v := .LicenseIDs }}
|
|
{{ printf "%q" $k }}: {{ printf "%q" $v }},
|
|
{{- end }}
|
|
}
|
|
`))
|
|
|
|
var versionMatch = regexp.MustCompile(`([0-9]+)\.?([0-9]+)?\.?([0-9]+)?\.?`)
|
|
|
|
func main() {
|
|
if err := run(); err != nil {
|
|
fmt.Println(err.Error())
|
|
os.Exit(1)
|
|
}
|
|
}
|
|
|
|
func run() error {
|
|
resp, err := http.Get(url)
|
|
if err != nil {
|
|
return fmt.Errorf("unable to get licenses list: %w", err)
|
|
}
|
|
var result LicenseList
|
|
if err = json.NewDecoder(resp.Body).Decode(&result); err != nil {
|
|
return fmt.Errorf("unable to decode license list: %w", err)
|
|
}
|
|
defer func() {
|
|
if err := resp.Body.Close(); err != nil {
|
|
log.Fatalf("unable to close body: %+v", err)
|
|
}
|
|
}()
|
|
|
|
f, err := os.Create(source)
|
|
if err != nil {
|
|
return fmt.Errorf("unable to create %q: %w", source, err)
|
|
}
|
|
defer func() {
|
|
if err := f.Close(); err != nil {
|
|
log.Fatalf("unable to close %q: %+v", source, err)
|
|
}
|
|
}()
|
|
|
|
licenseIDs := processSPDXLicense(result)
|
|
|
|
err = tmp.Execute(f, struct {
|
|
Timestamp time.Time
|
|
URL string
|
|
Version string
|
|
LicenseIDs map[string]string
|
|
}{
|
|
Timestamp: time.Now(),
|
|
URL: url,
|
|
Version: result.Version,
|
|
LicenseIDs: licenseIDs,
|
|
})
|
|
|
|
if err != nil {
|
|
return fmt.Errorf("unable to generate template: %w", err)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// Parsing the provided SPDX license list necessitates a three pass approach.
|
|
// The first pass is only related to what SPDX considers the truth. We use license info to
|
|
// find replacements for deprecated licenses.
|
|
// The second pass attempts to generate known short/long version listings for each key.
|
|
// For info on some short name conventions see this document:
|
|
// https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/#license-short-name.
|
|
// The short long listing generation attempts to build all license permutations for a given key.
|
|
// The new keys are then also associated with their relative SPDX value. If a key has already been entered
|
|
// we know to ignore it since it came from the first pass which is considered the SPDX source of truth.
|
|
// We also sort the licenses for the second pass so that cases like `GPL-1` associate to `GPL-1.0` and not `GPL-1.1`.
|
|
// The third pass is for overwriting deprecated licenses with replacements, for example GPL-2.0+ is deprecated
|
|
// and now maps to GPL-2.0-or-later.
|
|
func processSPDXLicense(result LicenseList) map[string]string {
|
|
// The order of variations/permutations of a license ID matter.
|
|
// The permutation code can generate the same value for two difference licenses,
|
|
// for example: The licenses `ABC-1.0` and `ABC-1.1` can both map to `ABC1`,
|
|
// we need to guarantee the order they are created to avoid mapping them incorrectly.
|
|
// To do this we use a sorted list.
|
|
sort.Slice(result.Licenses, func(i, j int) bool {
|
|
return result.Licenses[i].ID < result.Licenses[j].ID
|
|
})
|
|
|
|
// keys are simplified by removing dashes and lowercasing ID
|
|
// this is so license declarations in the wild like: LGPL3 LGPL-3 lgpl3 and lgpl-3 can all match
|
|
licenseIDs := make(map[string]string)
|
|
for _, l := range result.Licenses {
|
|
// licensePerms includes the cleanID in return slice
|
|
cleanID := cleanLicenseID(l.ID)
|
|
licensePerms := buildLicenseIDPermutations(cleanID)
|
|
|
|
// if license is deprecated, find its replacement and add to licenseIDs
|
|
if l.Deprecated {
|
|
idToMap := l.ID
|
|
replacement := result.findReplacementLicense(l)
|
|
if replacement != nil {
|
|
idToMap = replacement.ID
|
|
}
|
|
// it's important to use the original licensePerms here so that the deprecated license
|
|
// can now point to the new correct license
|
|
for _, id := range licensePerms {
|
|
if _, exists := licenseIDs[id]; exists {
|
|
// can be used to debug duplicate license permutations and confirm that examples like GPL1
|
|
// do not point to GPL-1.1
|
|
// log.Println("duplicate license list permutation found when mapping deprecated license to replacement")
|
|
// log.Printf("already have key: %q for SPDX ID: %q; attempted to map replacement ID: %q for deprecated ID: %q\n", id, value, replacement.ID, l.ID)
|
|
continue
|
|
}
|
|
licenseIDs[id] = idToMap
|
|
}
|
|
}
|
|
|
|
// if license is not deprecated, add all permutations to licenseIDs
|
|
for _, id := range licensePerms {
|
|
if _, exists := licenseIDs[id]; exists {
|
|
// log.Println("found duplicate license permutation key for non deprecated license")
|
|
// log.Printf("already have key: %q for SPDX ID: %q; tried to insert as SPDX ID:%q\n", id, value, l.ID)
|
|
continue
|
|
}
|
|
licenseIDs[id] = l.ID
|
|
}
|
|
}
|
|
|
|
return licenseIDs
|
|
}
|
|
|
|
func cleanLicenseID(id string) string {
|
|
cleanID := strings.ToLower(id)
|
|
return strings.ReplaceAll(cleanID, "-", "")
|
|
}
|