Add comparative analysis with anchore-engine (#78)

* add comparative analysis * remove extra comma from compare script tuple Co-authored-by: Alfredo Deza <adeza@anchore.com> Co-authored-by: Alfredo Deza <adeza@anchore.com>
2024-11-10 14:24:12 +00:00 · 2020-07-13 12:12:00 -04:00 · 2020-07-13 12:12:00 -04:00 · 61f51d80bb
commit 61f51d80bb
parent 3f090f9647
5 changed files with 213 additions and 1 deletions
--- a/5
+++ b/5
@ -22,11 +22,14 @@ define title
    @printf '$(TITLE)$(1)$(RESET)\n'
 endef

-.PHONY: all bootstrap lint lint-fix unit coverage integration check-pipeline clear-cache help test
+.PHONY: all bootstrap lint lint-fix unit coverage integration check-pipeline clear-cache help test compare

 all: lint test ## Run all checks (linting, unit tests, and integration tests)
 	@printf '$(SUCCESS)All checks pass!$(RESET)\n'

+compare:
+	@cd comparison && make
+
 test: unit integration ## Run all tests (currently unit & integration)

 help:
--- a/comparison/.gitignore
+++ b/comparison/.gitignore
@ -0,0 +1,2 @@
+*.json
+*-reports
--- a/comparison/Dockerfile
+++ b/comparison/Dockerfile
@ -0,0 +1,7 @@
+FROM python:3
+
+WORKDIR /
+COPY imgbom-reports /imgbom-reports
+COPY inline-reports /inline-reports
+COPY compare.py .
+ENTRYPOINT ["/compare.py"]
--- a/comparison/Makefile
+++ b/comparison/Makefile
@ -0,0 +1,27 @@
+IMAGE = "centos:8"
+IMAGE_CLEAN = $(shell echo $(IMAGE) | tr ":" "_")
+IMGBOM_DIR = imgbom-reports
+IMGBOM_REPORT = $(IMGBOM_DIR)/$(IMAGE_CLEAN).json
+INLINE_DIR = inline-reports
+INLINE_REPORT = $(INLINE_DIR)/$(IMAGE_CLEAN)-content-os.json
+
+.PHONY: bootstrap
+
+all: compare
+
+compare: $(INLINE_REPORT) $(IMGBOM_REPORT)
+	docker build -t compare-imgbom:latest .
+	docker run compare-imgbom:latest $(IMAGE)
+
+$(INLINE_REPORT):
+	echo "Creating $(INLINE_REPORT)..."
+	mkdir -p $(INLINE_DIR)
+	curl -s https://ci-tools.anchore.io/inline_scan-v0.7.0 | bash -s -- -p -r $(IMAGE)
+	mv anchore-reports/* $(INLINE_DIR)/
+	rmdir anchore-reports
+
+$(IMGBOM_REPORT):
+	echo "Creating $(IMGBOM_REPORT)..."
+	mkdir -p $(IMGBOM_DIR)
+	docker pull $(IMAGE)
+	go run ../main.go centos:latest -o json > $(IMGBOM_REPORT)
--- a/comparison/compare.py
+++ b/comparison/compare.py
@ -0,0 +1,173 @@
+#!/usr/bin/env python3
+import os
+import sys
+import json
+import functools
+import collections
+
+QUALITY_GATE_THRESHOLD = 0.9
+
+Metadata = collections.namedtuple("Metadata", "version")
+Package = collections.namedtuple("Package", "name type")
+Vulnerability = collections.namedtuple("Vulnerability", "cve package")
+
+
+class InlineScan:
+
+    report_tmpl = "{image}-{report}.json"
+
+    def __init__(self, image, report_dir="./"):
+        self.report_dir = report_dir
+        self.image = image
+
+    def packages(self):
+        python_packages, python_metadata = self._python_packages()
+        os_pacakges, os_metadata = self._os_packages()
+        return python_packages | os_pacakges, {**python_metadata, **os_metadata}
+
+    def _report_path(self, report):
+        return os.path.join(
+            self.report_dir,
+            self.report_tmpl.format(image=self.image.replace(":", "_"), report=report),
+        )
+
+    def _enumerate_section(self, report, section):
+        report_path = self._report_path(report=report)
+        with open(report_path) as json_file:
+            data = json.load(json_file)
+            for entry in data[section]:
+                yield entry
+
+    @functools.lru_cache
+    def _python_packages(self):
+        packages = set()
+        metadata = collections.defaultdict(dict)
+        for entry in self._enumerate_section(
+            report="content-python", section="content"
+        ):
+            package = Package(name=entry["package"], type=entry["type"].lower(),)
+            packages.add(package)
+            metadata[package.type][package] = Metadata(version=entry["version"])
+
+        return packages, metadata
+
+    @functools.lru_cache
+    def _os_packages(self):
+        packages = set()
+        metadata = collections.defaultdict(dict)
+        for entry in self._enumerate_section(report="content-os", section="content"):
+            package = Package(name=entry["package"], type=entry["type"].lower())
+            packages.add(package)
+            metadata[package.type][package] = Metadata(version=entry["version"])
+
+        return packages, metadata
+
+
+class ImgBom:
+
+    report_tmpl = "{image}.json"
+
+    def __init__(self, image, report_dir="./"):
+        self.report_path = os.path.join(
+            report_dir, self.report_tmpl.format(image=image.replace(":", "_"))
+        )
+
+    def _enumerate_section(self, section):
+        with open(self.report_path) as json_file:
+            data = json.load(json_file)
+            for entry in data[section]:
+                yield entry
+
+    @functools.lru_cache
+    def packages(self):
+        packages = set()
+        metadata = collections.defaultdict(dict)
+        for entry in self._enumerate_section(section="artifacts"):
+
+            # normalize to inline
+            pType = entry["type"].lower()
+            if pType in ("wheel", "egg"):
+                pType = "python"
+
+            package = Package(name=entry["name"], type=pType,)
+
+            packages.add(package)
+            metadata[package.type][package] = Metadata(version=entry["version"])
+        return packages, metadata
+
+
+def main(image):
+    inline = InlineScan(image=image, report_dir="inline-reports")
+    inline_packages, inline_metadata = inline.packages()
+
+    imgbom = ImgBom(image=image, report_dir="imgbom-reports")
+    imgbom_packages, imgbom_metadata = imgbom.packages()
+
+    if len(imgbom_packages) == 0 and len(inline_packages) == 0:
+        print("nobody found any packages")
+        return 0
+
+    same_packages = imgbom_packages & inline_packages
+    percent_overlap_packages = (
+        float(len(same_packages)) / float(len(inline_packages))
+    ) * 100.0
+
+    bonus_packages = imgbom_packages - inline_packages
+    missing_pacakges = inline_packages - imgbom_packages
+
+    inline_metadata_set = set()
+    for package in inline_packages:
+        metadata = inline_metadata[package.type][package]
+        inline_metadata_set.add((package, metadata))
+
+    imgbom_metadata_set = set()
+    for package in imgbom_packages:
+        metadata = imgbom_metadata[package.type][package]
+        imgbom_metadata_set.add((package, metadata))
+
+    same_metadata = imgbom_metadata_set & inline_metadata_set
+    percent_overlap_metadata = (
+        float(len(same_metadata)) / float(len(inline_metadata_set))
+    ) * 100.0
+
+    if len(bonus_packages) > 0:
+        print("Imgbom Bonus packages:")
+        for package in sorted(list(bonus_packages)):
+            print("    " + repr(package))
+        print()
+
+    if len(missing_pacakges) > 0:
+        print("Imgbom Missing packages:")
+        for package in sorted(list(missing_pacakges)):
+            print("    " + repr(package))
+        print()
+
+    print("Inline Packages: %d" % len(inline_packages))
+    print("Imgbom Packages: %d" % len(imgbom_packages))
+    print()
+    print(
+        "Baseline Packages Matched: %2.3f %% (%d/%d packages)"
+        % (percent_overlap_packages, len(same_packages), len(inline_packages))
+    )
+    print(
+        "Baseline Metadata Matched: %2.3f %% (%d/%d metadata)"
+        % (percent_overlap_metadata, len(same_metadata), len(inline_metadata_set))
+    )
+
+    overall_score = (percent_overlap_packages + percent_overlap_metadata) / 2.0
+
+    print("Overall Score: %2.3f %%" % overall_score)
+
+    if overall_score < (QUALITY_GATE_THRESHOLD * 100):
+        print("failed quality gate (>= %d %%)" % (QUALITY_GATE_THRESHOLD * 100))
+        return 1
+
+    return 0
+
+
+if __name__ == "__main__":
+    if len(sys.argv) != 2:
+        sys.exit("provide an image")
+
+    rc = main(sys.argv[1])
+    sys.exit(rc)