mirror of
https://github.com/anchore/syft
synced 2024-09-20 06:01:53 +00:00
add tests around MatchNamedCaptureGroups + rename
Signed-off-by: Alex Goodman <alex.goodman@anchore.com>
This commit is contained in:
parent
66ebe49a04
commit
5743e32e02
7 changed files with 113 additions and 19 deletions
|
@ -1,15 +0,0 @@
|
|||
package internal
|
||||
|
||||
import "regexp"
|
||||
|
||||
// MatchCaptureGroups takes a regular expression and string and returns all of the named capture group results in a map.
|
||||
func MatchCaptureGroups(regEx *regexp.Regexp, str string) map[string]string {
|
||||
match := regEx.FindStringSubmatch(str)
|
||||
results := make(map[string]string)
|
||||
for i, name := range regEx.SubexpNames() {
|
||||
if i > 0 && i <= len(match) {
|
||||
results[name] = match[i]
|
||||
}
|
||||
}
|
||||
return results
|
||||
}
|
39
internal/regex_helpers.go
Normal file
39
internal/regex_helpers.go
Normal file
|
@ -0,0 +1,39 @@
|
|||
package internal
|
||||
|
||||
import "regexp"
|
||||
|
||||
// MatchNamedCaptureGroups takes a regular expression and string and returns all of the named capture group results in a map.
|
||||
// Note: this is only for the first match in the regex.
|
||||
func MatchNamedCaptureGroups(regEx *regexp.Regexp, content string) map[string]string {
|
||||
// note: we are looking across all matches and stopping on the first non-empty match. Why? Take the following example:
|
||||
// input: "cool something to match against" pattern: `((?P<name>match) (?P<version>against))?`. Since the pattern is
|
||||
// encapsulated in an optional capture group, there will be results for each character, but the results will match
|
||||
// on nothing. The only "true" match will be at the end ("match against").
|
||||
allMatches := regEx.FindAllStringSubmatch(content, -1)
|
||||
for matchIdx, match := range allMatches {
|
||||
// fill a candidate results map with named capture group results, accepting empty values, but not groups with
|
||||
// no names
|
||||
results := make(map[string]string)
|
||||
for nameIdx, name := range regEx.SubexpNames() {
|
||||
if nameIdx <= len(match) && len(name) > 0 {
|
||||
results[name] = match[nameIdx]
|
||||
}
|
||||
}
|
||||
// note: since we are looking for the first best potential match we should stop when we find the first one
|
||||
// with non-empty results.
|
||||
if len(results) > 0 {
|
||||
foundNonEmptyValue := false
|
||||
for _, value := range results {
|
||||
if value != "" {
|
||||
foundNonEmptyValue = true
|
||||
break
|
||||
}
|
||||
}
|
||||
// return the first non-empty result, or if this is the last match, the results that were found.
|
||||
if foundNonEmptyValue || matchIdx == len(allMatches)-1 {
|
||||
return results
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
70
internal/regex_helpers_test.go
Normal file
70
internal/regex_helpers_test.go
Normal file
|
@ -0,0 +1,70 @@
|
|||
package internal
|
||||
|
||||
import (
|
||||
"regexp"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestMatchCaptureGroups(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
input string
|
||||
pattern string
|
||||
expected map[string]string
|
||||
}{
|
||||
{
|
||||
name: "go-case",
|
||||
input: "match this thing",
|
||||
pattern: `(?P<name>match).*(?P<version>thing)`,
|
||||
expected: map[string]string{
|
||||
"name": "match",
|
||||
"version": "thing",
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "only matches the first instance",
|
||||
input: "match this thing batch another think",
|
||||
pattern: `(?P<name>[mb]atch).*?(?P<version>thin[gk])`,
|
||||
expected: map[string]string{
|
||||
"name": "match",
|
||||
"version": "thing",
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "nested capture groups",
|
||||
input: "cool something to match against",
|
||||
pattern: `((?P<name>match) (?P<version>against))`,
|
||||
expected: map[string]string{
|
||||
"name": "match",
|
||||
"version": "against",
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "nested optional capture groups",
|
||||
input: "cool something to match against",
|
||||
pattern: `((?P<name>match) (?P<version>against))?`,
|
||||
expected: map[string]string{
|
||||
"name": "match",
|
||||
"version": "against",
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "nested optional capture groups with larger match",
|
||||
input: "cool something to match against match never",
|
||||
pattern: `.*?((?P<name>match) (?P<version>(against|never)))?`,
|
||||
expected: map[string]string{
|
||||
"name": "match",
|
||||
"version": "against",
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
t.Run(test.name, func(t *testing.T) {
|
||||
actual := MatchNamedCaptureGroups(regexp.MustCompile(test.pattern), test.input)
|
||||
assert.Equal(t, test.expected, actual)
|
||||
})
|
||||
}
|
||||
}
|
|
@ -21,7 +21,7 @@ func parseLicensesFromCopyright(reader io.Reader) []string {
|
|||
for scanner.Scan() {
|
||||
line := scanner.Text()
|
||||
|
||||
matchesByGroup := internal.MatchCaptureGroups(licensePattern, line)
|
||||
matchesByGroup := internal.MatchNamedCaptureGroups(licensePattern, line)
|
||||
if len(matchesByGroup) > 0 {
|
||||
candidate, ok := matchesByGroup["license"]
|
||||
if !ok {
|
||||
|
|
|
@ -145,7 +145,7 @@ func extractAllFields(reader *bufio.Reader) (map[string]interface{}, error) {
|
|||
// of the "<name>" form, then return name and nil
|
||||
func extractSourceVersion(source string) (string, string) {
|
||||
// special handling for the Source field since it has formatted data
|
||||
match := internal.MatchCaptureGroups(sourceRegexp, source)
|
||||
match := internal.MatchNamedCaptureGroups(sourceRegexp, source)
|
||||
return match["name"], match["version"]
|
||||
}
|
||||
|
||||
|
|
|
@ -63,7 +63,7 @@ func (a *Author) UnmarshalJSON(b []byte) error {
|
|||
}
|
||||
} else {
|
||||
// parse out "name <email> (url)" into an Author struct
|
||||
fields = internal.MatchCaptureGroups(authorPattern, authorStr)
|
||||
fields = internal.MatchNamedCaptureGroups(authorPattern, authorStr)
|
||||
}
|
||||
|
||||
// translate the map into a structure
|
||||
|
|
|
@ -77,7 +77,7 @@ func parseGemSpecEntries(_ string, reader io.Reader) ([]pkg.Package, error) {
|
|||
}
|
||||
|
||||
for field, pattern := range patterns {
|
||||
matchMap := internal.MatchCaptureGroups(pattern, sanitizedLine)
|
||||
matchMap := internal.MatchNamedCaptureGroups(pattern, sanitizedLine)
|
||||
if value := matchMap[field]; value != "" {
|
||||
if postProcessor := postProcessors[field]; postProcessor != nil {
|
||||
fields[field] = postProcessor(value)
|
||||
|
|
Loading…
Reference in a new issue