feat: Perform case insensitive matching on Java license files (#2235)

Signed-off-by: Colm O hEigeartaigh <coheigea@apache.org>
This commit is contained in:
Colm O hEigeartaigh 2023-10-25 14:51:59 +01:00 committed by GitHub
parent 7392d607b6
commit 1daf18fee9
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 22 additions and 13 deletions

View file

@ -40,15 +40,18 @@ func (z ZipFileManifest) Add(entry string, info os.FileInfo) {
}
// GlobMatch returns the path keys that match the given value(s).
func (z ZipFileManifest) GlobMatch(patterns ...string) []string {
func (z ZipFileManifest) GlobMatch(caseInsensitive bool, patterns ...string) []string {
uniqueMatches := strset.New()
for _, pattern := range patterns {
for entry := range z {
// We want to match globs as if entries begin with a leading slash (akin to an absolute path)
// so that glob logic is consistent inside and outside of ZIP archives
normalizedEntry := normalizeZipEntryName(entry)
normalizedEntry := normalizeZipEntryName(caseInsensitive, entry)
if caseInsensitive {
pattern = strings.ToLower(pattern)
}
if GlobMatch(pattern, normalizedEntry) {
uniqueMatches.Add(entry)
}
@ -62,7 +65,10 @@ func (z ZipFileManifest) GlobMatch(patterns ...string) []string {
}
// normalizeZipEntryName takes the given path entry and ensures it is prefixed with "/".
func normalizeZipEntryName(entry string) string {
func normalizeZipEntryName(caseInsensitive bool, entry string) string {
if caseInsensitive {
entry = strings.ToLower(entry)
}
if !strings.HasPrefix(entry, "/") {
return "/" + entry
}

View file

@ -116,6 +116,10 @@ func TestZipFileManifest_GlobMatch(t *testing.T) {
"*/a-file.txt",
"some-dir/a-file.txt",
},
{
"*/A-file.txt",
"some-dir/a-file.txt",
},
{
"**/*.zip",
"nested.zip",
@ -126,7 +130,7 @@ func TestZipFileManifest_GlobMatch(t *testing.T) {
t.Run(tc.glob, func(t *testing.T) {
glob := tc.glob
results := z.GlobMatch(glob)
results := z.GlobMatch(true, glob)
if len(results) == 1 && results[0] == tc.expected {
return

View file

@ -20,7 +20,6 @@ func FileNames() []string {
"LICENSE",
"LICENSE.md",
"LICENSE.markdown",
"license.txt",
"LICENSE.txt",
"LICENSE-2.0.txt",
"LICENCE-2.0.txt",

View file

@ -150,7 +150,7 @@ func (j *archiveParser) parse() ([]pkg.Package, []artifact.Relationship, error)
// discoverMainPackage parses the root Java manifest used as the parent package to all discovered nested packages.
func (j *archiveParser) discoverMainPackage() (*pkg.Package, error) {
// search and parse java manifest files
manifestMatches := j.fileManifest.GlobMatch(manifestGlob)
manifestMatches := j.fileManifest.GlobMatch(false, manifestGlob)
if len(manifestMatches) > 1 {
return nil, fmt.Errorf("found multiple manifests in the jar: %+v", manifestMatches)
} else if len(manifestMatches) == 0 {
@ -246,8 +246,8 @@ type parsedPomProject struct {
}
func (j *archiveParser) guessMainPackageNameAndVersionFromPomInfo() (name, version string, licenses []pkg.License) {
pomPropertyMatches := j.fileManifest.GlobMatch(pomPropertiesGlob)
pomMatches := j.fileManifest.GlobMatch(pomXMLGlob)
pomPropertyMatches := j.fileManifest.GlobMatch(false, pomPropertiesGlob)
pomMatches := j.fileManifest.GlobMatch(false, pomXMLGlob)
var pomPropertiesObject pkg.PomProperties
var pomProjectObject parsedPomProject
if len(pomPropertyMatches) == 1 || len(pomMatches) == 1 {
@ -295,13 +295,13 @@ func (j *archiveParser) discoverPkgsFromAllMavenFiles(parentPkg *pkg.Package) ([
var pkgs []pkg.Package
// pom.properties
properties, err := pomPropertiesByParentPath(j.archivePath, j.location, j.fileManifest.GlobMatch(pomPropertiesGlob))
properties, err := pomPropertiesByParentPath(j.archivePath, j.location, j.fileManifest.GlobMatch(false, pomPropertiesGlob))
if err != nil {
return nil, err
}
// pom.xml
projects, err := pomProjectByParentPath(j.archivePath, j.location, j.fileManifest.GlobMatch(pomXMLGlob))
projects, err := pomProjectByParentPath(j.archivePath, j.location, j.fileManifest.GlobMatch(false, pomXMLGlob))
if err != nil {
return nil, err
}
@ -340,10 +340,10 @@ func getDigestsFromArchive(archivePath string) ([]file.Digest, error) {
func (j *archiveParser) getLicenseFromFileInArchive() ([]pkg.License, error) {
var fileLicenses []pkg.License
for _, filename := range licenses.FileNames() {
licenseMatches := j.fileManifest.GlobMatch("/META-INF/" + filename)
licenseMatches := j.fileManifest.GlobMatch(true, "/META-INF/"+filename)
if len(licenseMatches) == 0 {
// Try the root directory if it's not in META-INF
licenseMatches = j.fileManifest.GlobMatch("/" + filename)
licenseMatches = j.fileManifest.GlobMatch(true, "/"+filename)
}
if len(licenseMatches) > 0 {
@ -378,7 +378,7 @@ func (j *archiveParser) discoverPkgsFromNestedArchives(parentPkg *pkg.Package) (
// associating each discovered package to the given parent package.
func discoverPkgsFromZip(location file.Location, archivePath, contentPath string, fileManifest intFile.ZipFileManifest, parentPkg *pkg.Package) ([]pkg.Package, []artifact.Relationship, error) {
// search and parse pom.properties files & fetch the contents
openers, err := intFile.ExtractFromZipToUniqueTempFile(archivePath, contentPath, fileManifest.GlobMatch(archiveFormatGlobs...)...)
openers, err := intFile.ExtractFromZipToUniqueTempFile(archivePath, contentPath, fileManifest.GlobMatch(false, archiveFormatGlobs...)...)
if err != nil {
return nil, nil, fmt.Errorf("unable to extract files from zip: %w", err)
}