From f52d8e872de276591fbfd48988d75ba212c7ca99 Mon Sep 17 00:00:00 2001 From: Hon <8292703+hxnyk@users.noreply.github.com> Date: Tue, 3 Sep 2024 12:54:39 -0700 Subject: [PATCH] Add huggingface tui config (#3060) * add huggingface tui config * update readme * undo auto format --- README.md | 26 ++++++- pkg/tui/pages/source_select/source_select.go | 1 + pkg/tui/sources/huggingface/huggingface.go | 77 ++++++++++++++++++++ pkg/tui/sources/sources.go | 5 ++ 4 files changed, 107 insertions(+), 2 deletions(-) create mode 100644 pkg/tui/sources/huggingface/huggingface.go diff --git a/README.md b/README.md index f65fafd7e..31850e81b 100644 --- a/README.md +++ b/README.md @@ -309,11 +309,33 @@ The following command will enumerate deleted and hidden commits on a GitHub repo trufflehog github-experimental --repo https://github.com//.git --object-discovery ``` -In addition to the normal TruffleHog output, the `--object-discovery` flag creates two files in a new `$HOME/.trufflehog` directory: `valid_hidden.txt` and `invalid.txt`. These are used to track state during commit enumeration, as well as to provide users with a complete list of all hidden and deleted commits (`valid_hidden.txt`). If you'd like to automatically remove these files after scanning, please add the flag `--delete-cached-data`. +In addition to the normal TruffleHog output, the `--object-discovery` flag creates two files in a new `$HOME/.trufflehog` directory: `valid_hidden.txt` and `invalid.txt`. These are used to track state during commit enumeration, as well as to provide users with a complete list of all hidden and deleted commits (`valid_hidden.txt`). If you'd like to automatically remove these files after scanning, please add the flag `--delete-cached-data`. **Note**: Enumerating all valid commits on a repository using this method takes between 20 minutes and a few hours, depending on the size of your repository. We added a progress bar to keep you updated on how long the enumeration will take. The actual secret scanning runs extremely fast. -For more information on Cross Fork Object References, please [read our blog post](https://trufflesecurity.com/blog/anyone-can-access-deleted-and-private-repo-data-github). +For more information on Cross Fork Object References, please [read our blog post](https://trufflesecurity.com/blog/anyone-can-access-deleted-and-private-repo-data-github). + +## 16. Scan Hugging Face + +### Scan a Hugging Face Model, Dataset or Space + +```bash +trufflehog huggingface --model --space --dataset +``` + +### Scan all Models, Datasets and Spaces belonging to a Hugging Face Organization or User + +```bash +trufflehog huggingface --org --user +``` + +(Optionally) When scanning an organization or user, you can skip an entire class of resources with `--skip-models`, `--skip-datasets`, `--skip-spaces` OR a particular resource with `--ignore-models `, `--ignore-datasets `, `--ignore-spaces `. + +### Scan Discussion and PR Comments + +```bash +trufflehog huggingface --model --include-discussions --include-prs +``` # :question: FAQ diff --git a/pkg/tui/pages/source_select/source_select.go b/pkg/tui/pages/source_select/source_select.go index d58a73031..723bf68d8 100644 --- a/pkg/tui/pages/source_select/source_select.go +++ b/pkg/tui/pages/source_select/source_select.go @@ -65,6 +65,7 @@ func New(c common.Common) *SourceSelect { OssItem("Git", "Scan git repositories."), OssItem("GitHub", "Scan GitHub repositories and/or organizations."), OssItem("Filesystem", "Scan your filesystem by selecting what directories to scan."), + OssItem("Hugging Face", "Scan Hugging Face, an AI/ML community."), OssItem("Jenkins", "Scan Jenkins, a CI/CD platform. (Recently open-sourced from enterprise!)"), OssItem("Elasticsearch", "Scan your Elasticsearch cluster or Elastic Cloud instance."), OssItem("Postman", "Scan a collection, workspace, or environment from Postman, the API platform."), diff --git a/pkg/tui/sources/huggingface/huggingface.go b/pkg/tui/sources/huggingface/huggingface.go new file mode 100644 index 000000000..bee86206d --- /dev/null +++ b/pkg/tui/sources/huggingface/huggingface.go @@ -0,0 +1,77 @@ +package huggingface + +import ( + "strings" + + "github.com/trufflesecurity/trufflehog/v3/pkg/tui/common" + "github.com/trufflesecurity/trufflehog/v3/pkg/tui/components/textinputs" +) + +type huggingFaceCmdModel struct { + textinputs.Model +} + +func GetNote() string { + return "Please enter the organization, user, model, space, or dataset you would like to scan." +} + +func GetFields() huggingFaceCmdModel { + org := textinputs.InputConfig{ + Label: "Organization", + Key: "org", + Required: false, + Help: "Hugging Face organization name. This will scan all models, datasets, and spaces belonging to the organization.", + } + user := textinputs.InputConfig{ + Label: "Username", + Key: "user", + Required: false, + Help: "Hugging Face user. This will scan all models, datasets, and spaces belonging to the user.", + } + model := textinputs.InputConfig{ + Label: "Model", + Key: "model", + Required: false, + Help: "Hugging Face model. Example: org/model_name or user/model_name", + } + space := textinputs.InputConfig{ + Label: "Space", + Key: "space", + Required: false, + Help: "Hugging Face space. Example: org/space_name or user/space_name.", + } + dataset := textinputs.InputConfig{ + Label: "Dataset", + Key: "dataset", + Required: false, + Help: "Hugging Face dataset. Example: org/dataset_name or user/dataset_name.", + } + + return huggingFaceCmdModel{textinputs.New([]textinputs.InputConfig{org, user, model, space, dataset})} +} + +func (m huggingFaceCmdModel) Cmd() string { + var command []string + command = append(command, "trufflehog", "huggingface") + + inputs := m.GetInputs() + keys := []string{"org", "user", "model", "space", "dataset"} + + for _, key := range keys { + val, ok := inputs[key] + if !ok || val.Value == "" { + continue + } + + command = append(command, "--"+key+"="+val.Value) + } + + return strings.Join(command, " ") +} + +func (m huggingFaceCmdModel) Summary() string { + inputs := m.GetInputs() + labels := m.GetLabels() + keys := []string{"org", "user", "model", "space", "dataset"} + return common.SummarizeSource(keys, inputs, labels) +} diff --git a/pkg/tui/sources/sources.go b/pkg/tui/sources/sources.go index 7d24b672f..47166c43c 100644 --- a/pkg/tui/sources/sources.go +++ b/pkg/tui/sources/sources.go @@ -12,6 +12,7 @@ import ( "github.com/trufflesecurity/trufflehog/v3/pkg/tui/sources/git" "github.com/trufflesecurity/trufflehog/v3/pkg/tui/sources/github" "github.com/trufflesecurity/trufflehog/v3/pkg/tui/sources/gitlab" + "github.com/trufflesecurity/trufflehog/v3/pkg/tui/sources/huggingface" "github.com/trufflesecurity/trufflehog/v3/pkg/tui/sources/jenkins" "github.com/trufflesecurity/trufflehog/v3/pkg/tui/sources/postman" "github.com/trufflesecurity/trufflehog/v3/pkg/tui/sources/s3" @@ -27,6 +28,8 @@ func GetSourceNotes(sourceName string) string { return postman.GetNote() case "elasticsearch": return elasticsearch.GetNote() + case "huggingface": + return huggingface.GetNote() case "jenkins": return jenkins.GetNote() @@ -63,6 +66,8 @@ func GetSourceFields(sourceName string) CmdModel { return github.GetFields() case "gitlab": return gitlab.GetFields() + case "hugging face": + return huggingface.GetFields() case "jenkins": return jenkins.GetFields() case "postman":