diff --git a/pkg/analyzer/analyzers/huggingface/huggingface.go b/pkg/analyzer/analyzers/huggingface/huggingface.go index 7c7e6e25f..2285d3fb2 100644 --- a/pkg/analyzer/analyzers/huggingface/huggingface.go +++ b/pkg/analyzer/analyzers/huggingface/huggingface.go @@ -30,11 +30,234 @@ type Analyzer struct { func (Analyzer) Type() analyzerpb.AnalyzerType { return analyzerpb.AnalyzerType_HuggingFace } func (a Analyzer) Analyze(_ context.Context, credInfo map[string]string) (*analyzers.AnalyzerResult, error) { - _, err := AnalyzePermissions(a.Cfg, credInfo["key"]) + key, ok := credInfo["key"] + if !ok || key == "" { + return nil, fmt.Errorf("key not found in credentialInfo") + } + + info, err := AnalyzePermissions(a.Cfg, key) if err != nil { return nil, err } - return nil, fmt.Errorf("not implemented") + return secretInfoToAnalyzerResult(info), nil +} + +func bakeUnboundedResources(tokenJSON HFTokenJSON) []analyzers.Resource { + unboundedResources := make([]analyzers.Resource, len(tokenJSON.Orgs)) + for idx, org := range tokenJSON.Orgs { + unboundedResources[idx] = analyzers.Resource{ + Name: org.Name, + Type: "organization", + Metadata: map[string]interface{}{ + "role": org.Role, + "is_enterprise": org.IsEnterprise, + }, + } + } + return unboundedResources +} + +func bakeUnfineGrainedBindings(allModels []Model, tokenJSON HFTokenJSON) []analyzers.Binding { + bindings := make([]analyzers.Binding, len(allModels)) + + for idx, model := range allModels { + // Add Read Privs to All Models + modelResource := analyzers.Resource{ + Name: model.Name, + Type: "model", + Metadata: map[string]interface{}{ + "private": model.Private, + }, + } + + // means both read & write permission for the model + accessLevel := string(analyzers.READ) + if tokenJSON.Auth.AccessToken.Type == WRITE { + accessLevel = string(analyzers.WRITE) + } + bindings[idx] = analyzers.Binding{ + Resource: modelResource, + Permission: analyzers.Permission{ + Value: string(accessLevel), + }, + } + } + return bindings +} + +// finegrained scopes are grouped by org, user or model. +func bakefineGrainedBindings(allModels []Model, tokenJSON HFTokenJSON) []analyzers.Binding { + // this section will extract the relevant permissions for each entity and store them in a map + var nameToPermissions = make(map[string]analyzers.Permission) + for _, permission := range tokenJSON.Auth.AccessToken.FineGrained.Scoped { + privs := analyzers.Permission{ + Value: string(analyzers.NONE), + } + for _, perm := range permission.Permissions { + if perm == "repo.content.read" { + privs.Value = string(analyzers.READ) + } else if perm == "repo.write" { + privs.Value = string(analyzers.WRITE) + } + } + if permission.Entity.Type == "user" || permission.Entity.Type == "org" { + nameToPermissions[permission.Entity.Name] = privs + } else if permission.Entity.Type == "model" { + nameToPermissions[modelNameLookup(allModels, permission.Entity.ID)] = privs + } + } + + bindings := make([]analyzers.Binding, len(allModels)) + for idx, model := range allModels { + + // Add Read Privs to All Models + modelResource := analyzers.Resource{ + Name: model.Name, + Type: "model", + Metadata: map[string]interface{}{ + "private": model.Private, + }, + } + + var perm analyzers.Permission + // get username/orgname for each model and apply those permissions + modelUsername := strings.Split(model.Name, "/")[0] + if permissions, ok := nameToPermissions[modelUsername]; ok { + perm = permissions + } + // override model permissions with repo-specific permissions + if permissions, ok := nameToPermissions[model.Name]; ok { + perm = permissions + } + + bindings[idx] = analyzers.Binding{ + Resource: modelResource, + Permission: perm, + } + } + return bindings +} + +func bakeOrganizationBindings(tokenJSON HFTokenJSON) []analyzers.Binding { + // check if there are any org permissions + // if so, save them as a map. Only need to do this once + // even if multiple orgs b/c as of 6/6/24, users can only define one set of scopes + // for all orgs referenced on an access token + orgPermissions := map[string]struct{}{} + var orgResource *analyzers.Resource = nil + for _, permission := range tokenJSON.Auth.AccessToken.FineGrained.Scoped { + if permission.Entity.Type == "org" { + orgResource = &analyzers.Resource{ + Name: permission.Entity.Name, + Type: "organization", + } + for _, perm := range permission.Permissions { + orgPermissions[perm] = struct{}{} + } + break + } + } + + bindings := make([]analyzers.Binding, 0) + // check if there are any org permissions + if orgResource == nil { + return bindings + } + + for _, permission := range org_scopes_order { + for key, value := range org_scopes[permission] { + if _, ok := orgPermissions[key]; ok { + bindings = append(bindings, analyzers.Binding{ + Resource: *orgResource, + Permission: analyzers.Permission{ + Value: value, + }, + }) + } + } + } + + return bindings +} + +func bakeUserBindings(tokenJSON HFTokenJSON) []analyzers.Binding { + bindings := make([]analyzers.Binding, 0) + // build a map of all user permissions + users := map[string]struct{}{} + userPermissions := map[string]struct{}{} + for _, permission := range tokenJSON.Auth.AccessToken.FineGrained.Scoped { + if permission.Entity.Type == "user" { + users[permission.Entity.Name] = struct{}{} + for _, perm := range permission.Permissions { + userPermissions[perm] = struct{}{} + } + } + } + + // global permissions only apply to user tokens as of 6/6/24 + // but there would be a naming collision in the scopes document + // so we prepend "global." to the key and then add to the map + for _, permission := range tokenJSON.Auth.AccessToken.FineGrained.Global { + userPermissions["global."+permission] = struct{}{} + } + + // check if there are any user permissions + if len(userPermissions) == 0 { + return bindings + } + + userResource := analyzers.Resource{ + Name: tokenJSON.Username, + Type: "user", + } + for _, permission := range user_scopes_order { + for key, value := range user_scopes[permission] { + if _, ok := userPermissions[key]; ok { + bindings = append(bindings, analyzers.Binding{ + Resource: userResource, + Permission: analyzers.Permission{ + Value: value, + }, + }) + } + } + } + + return bindings +} + +func secretInfoToAnalyzerResult(info *SecretInfo) *analyzers.AnalyzerResult { + if info == nil { + return nil + } + + result := analyzers.AnalyzerResult{ + AnalyzerType: analyzerpb.AnalyzerType_HuggingFace, + Metadata: map[string]interface{}{ + "username": info.Token.Username, + "name": info.Token.Name, + "token_name": info.Token.Auth.AccessToken.Name, + "token_type": info.Token.Auth.AccessToken.Type, + }, + } + + if len(info.Token.Orgs) > 0 { + result.UnboundedResources = bakeUnboundedResources(info.Token) + } + + result.Bindings = make([]analyzers.Binding, 0) + if info.Token.Auth.AccessToken.Type != FINEGRAINED { + result.Bindings = append(result.Bindings, bakeUnfineGrainedBindings(info.Models, info.Token)...) + } + + result.Bindings = append(result.Bindings, bakefineGrainedBindings(info.Models, info.Token)...) + + if info.Token.Auth.AccessToken.Type == FINEGRAINED { + result.Bindings = append(result.Bindings, bakeOrganizationBindings(info.Token)...) + result.Bindings = append(result.Bindings, bakeUserBindings(info.Token)...) + } + + return &result } // HFTokenJSON is the struct for the HF /whoami-v2 API JSON response diff --git a/pkg/detectors/huggingface/huggingface.go b/pkg/detectors/huggingface/huggingface.go index f836def20..1e5bca446 100644 --- a/pkg/detectors/huggingface/huggingface.go +++ b/pkg/detectors/huggingface/huggingface.go @@ -4,10 +4,11 @@ import ( "context" "encoding/json" "fmt" - regexp "github.com/wasilibs/go-re2" "net/http" "strings" + regexp "github.com/wasilibs/go-re2" + "github.com/trufflesecurity/trufflehog/v3/pkg/common" "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" @@ -54,6 +55,7 @@ func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (result s1.Verified = isVerified s1.ExtraData = extraData s1.SetVerificationError(verificationErr, resMatch) + s1.AnalysisInfo = map[string]string{"key": resMatch} } results = append(results, s1) diff --git a/pkg/detectors/huggingface/huggingface_test.go b/pkg/detectors/huggingface/huggingface_test.go index eba2decd1..f954368d8 100644 --- a/pkg/detectors/huggingface/huggingface_test.go +++ b/pkg/detectors/huggingface/huggingface_test.go @@ -191,6 +191,7 @@ func TestHuggingface_FromChunk(t *testing.T) { if (got[i].VerificationError() != nil) != tt.wantVerificationErr { t.Fatalf(" wantVerificationError = %v, verification error = %v", tt.wantVerificationErr, got[i].VerificationError()) } + got[i].AnalysisInfo = nil } ignoreOpts := cmpopts.IgnoreFields(detectors.Result{}, "Raw", "verificationError") if diff := cmp.Diff(got, tt.want, ignoreOpts); diff != "" {