2024-09-21 04:00:54 -07:00
|
|
|
import platform
|
|
|
|
from pathlib import Path
|
2024-09-22 19:30:24 -07:00
|
|
|
from typing import List, Optional, Dict, ClassVar
|
2024-09-21 04:00:54 -07:00
|
|
|
|
|
|
|
from django.conf import settings
|
|
|
|
|
|
|
|
# Depends on other PyPI/vendor packages:
|
|
|
|
from pydantic import InstanceOf, Field
|
|
|
|
from pydantic_pkgr import (
|
|
|
|
BinProvider,
|
|
|
|
BinName,
|
|
|
|
BinProviderName,
|
|
|
|
ProviderLookupDict,
|
|
|
|
bin_abspath,
|
|
|
|
)
|
|
|
|
|
|
|
|
# Depends on other Django apps:
|
|
|
|
from plugantic.base_plugin import BasePlugin
|
|
|
|
from plugantic.base_configset import BaseConfigSet, ConfigSectionName
|
|
|
|
from plugantic.base_binary import BaseBinary, env
|
|
|
|
# from plugantic.base_extractor import BaseExtractor
|
|
|
|
# from plugantic.base_queue import BaseQueue
|
|
|
|
from plugantic.base_hook import BaseHook
|
|
|
|
|
|
|
|
# Depends on Other Plugins:
|
2024-09-24 01:25:55 -07:00
|
|
|
from pkg_plugins.puppeteer.apps import PUPPETEER_BINPROVIDER
|
|
|
|
from pkg_plugins.playwright.apps import PLAYWRIGHT_BINPROVIDER
|
2024-09-21 04:00:54 -07:00
|
|
|
|
|
|
|
|
2024-09-22 13:17:10 -07:00
|
|
|
CHROMIUM_BINARY_NAMES_LINUX = [
|
2024-09-21 04:00:54 -07:00
|
|
|
"chromium",
|
|
|
|
"chromium-browser",
|
|
|
|
"chromium-browser-beta",
|
|
|
|
"chromium-browser-unstable",
|
|
|
|
"chromium-browser-canary",
|
|
|
|
"chromium-browser-dev",
|
|
|
|
]
|
2024-09-22 13:17:10 -07:00
|
|
|
CHROMIUM_BINARY_NAMES_MACOS = ["/Applications/Chromium.app/Contents/MacOS/Chromium"]
|
|
|
|
CHROMIUM_BINARY_NAMES = CHROMIUM_BINARY_NAMES_LINUX + CHROMIUM_BINARY_NAMES_MACOS
|
|
|
|
|
|
|
|
CHROME_BINARY_NAMES_LINUX = [
|
2024-09-21 04:00:54 -07:00
|
|
|
"google-chrome",
|
|
|
|
"google-chrome-stable",
|
|
|
|
"google-chrome-beta",
|
|
|
|
"google-chrome-canary",
|
|
|
|
"google-chrome-unstable",
|
|
|
|
"google-chrome-dev",
|
2024-09-22 13:17:10 -07:00
|
|
|
"chrome"
|
|
|
|
]
|
|
|
|
CHROME_BINARY_NAMES_MACOS = [
|
2024-09-21 04:00:54 -07:00
|
|
|
"/Applications/Google Chrome.app/Contents/MacOS/Google Chrome",
|
|
|
|
"/Applications/Google Chrome Canary.app/Contents/MacOS/Google Chrome Canary",
|
|
|
|
]
|
2024-09-22 13:17:10 -07:00
|
|
|
CHROME_BINARY_NAMES = CHROME_BINARY_NAMES_LINUX + CHROME_BINARY_NAMES_MACOS
|
2024-09-21 04:00:54 -07:00
|
|
|
|
|
|
|
|
|
|
|
def autodetect_system_chrome_install(PATH=None) -> Optional[Path]:
|
|
|
|
for bin_name in CHROME_BINARY_NAMES + CHROMIUM_BINARY_NAMES:
|
|
|
|
abspath = bin_abspath(bin_name, PATH=env.PATH)
|
|
|
|
if abspath:
|
|
|
|
return abspath
|
|
|
|
return None
|
|
|
|
|
2024-09-22 13:17:10 -07:00
|
|
|
def create_macos_app_symlink(target: Path, shortcut: Path):
|
|
|
|
"""
|
|
|
|
on macOS, some binaries are inside of .app, so we need to
|
|
|
|
create a tiny bash script instead of a symlink
|
|
|
|
(so that ../ parent relationships are relative to original .app instead of callsite dir)
|
|
|
|
"""
|
|
|
|
# TODO: should we enforce this? is it useful in any other situation?
|
|
|
|
# if platform.system().lower() != 'darwin':
|
|
|
|
# raise Exception(...)
|
|
|
|
|
|
|
|
shortcut.write_text(f"""#!/usr/bin/env bash\nexec '{target}' "$@"\n""")
|
|
|
|
shortcut.chmod(0o777) # make sure its executable by everyone
|
|
|
|
|
2024-09-21 04:00:54 -07:00
|
|
|
###################### Config ##########################
|
|
|
|
|
|
|
|
|
|
|
|
class ChromeDependencyConfigs(BaseConfigSet):
|
2024-09-22 19:30:24 -07:00
|
|
|
section: ClassVar[ConfigSectionName] = "DEPENDENCY_CONFIG"
|
2024-09-21 04:00:54 -07:00
|
|
|
|
2024-09-22 13:17:10 -07:00
|
|
|
CHROME_BINARY: str = Field(default='chrome')
|
2024-09-21 04:00:54 -07:00
|
|
|
CHROME_ARGS: Optional[List[str]] = Field(default=None)
|
|
|
|
CHROME_EXTRA_ARGS: List[str] = []
|
|
|
|
CHROME_DEFAULT_ARGS: List[str] = ['--timeout={TIMEOUT-10}']
|
2024-09-22 13:17:29 -07:00
|
|
|
|
2024-09-22 16:28:30 -07:00
|
|
|
# def load(self) -> Self:
|
|
|
|
# # for each field in the model, load its value
|
|
|
|
# # load from each source in order of precedence (lowest to highest):
|
|
|
|
# # - schema default
|
|
|
|
# # - ArchiveBox.conf INI file
|
|
|
|
# # - environment variables
|
|
|
|
# # - command-line arguments
|
2024-09-22 13:17:29 -07:00
|
|
|
|
2024-09-22 16:28:30 -07:00
|
|
|
# LOADED_VALUES: Dict[str, Any] = {}
|
|
|
|
|
|
|
|
# for field_name, field in self.__fields__.items():
|
|
|
|
# def_value = field.default_factory() if field.default_factory else field.default
|
|
|
|
# ini_value = settings.INI_CONFIG.get_value(field_name)
|
|
|
|
# env_value = settings.ENV_CONFIG.get_value(field_name)
|
|
|
|
# cli_value = settings.CLI_CONFIG.get_value(field_name)
|
|
|
|
# run_value = settings.RUN_CONFIG.get_value(field_name)
|
|
|
|
# value = run_value or cli_value or env_value or ini_value or def_value
|
2024-09-21 04:00:54 -07:00
|
|
|
|
|
|
|
class ChromeConfigs(ChromeDependencyConfigs):
|
|
|
|
# section: ConfigSectionName = 'ALL_CONFIGS'
|
|
|
|
pass
|
|
|
|
|
|
|
|
DEFAULT_GLOBAL_CONFIG = {
|
|
|
|
}
|
|
|
|
|
|
|
|
CHROME_CONFIG = ChromeConfigs(**DEFAULT_GLOBAL_CONFIG)
|
|
|
|
|
|
|
|
|
|
|
|
class ChromeBinary(BaseBinary):
|
2024-09-22 13:17:10 -07:00
|
|
|
name: BinName = CHROME_CONFIG.CHROME_BINARY
|
2024-09-21 04:00:54 -07:00
|
|
|
binproviders_supported: List[InstanceOf[BinProvider]] = [PUPPETEER_BINPROVIDER, env, PLAYWRIGHT_BINPROVIDER]
|
|
|
|
|
|
|
|
provider_overrides: Dict[BinProviderName, ProviderLookupDict] = {
|
|
|
|
env.name: {
|
2024-09-22 13:17:10 -07:00
|
|
|
'abspath': lambda: autodetect_system_chrome_install(PATH=env.PATH), # /usr/bin/google-chrome-stable
|
2024-09-21 04:00:54 -07:00
|
|
|
},
|
|
|
|
PUPPETEER_BINPROVIDER.name: {
|
2024-09-22 13:17:10 -07:00
|
|
|
'packages': lambda: ['chrome@stable'], # npx @puppeteer/browsers install chrome@stable
|
2024-09-21 04:00:54 -07:00
|
|
|
},
|
|
|
|
PLAYWRIGHT_BINPROVIDER.name: {
|
2024-09-22 13:17:10 -07:00
|
|
|
'packages': lambda: ['chromium'], # playwright install chromium
|
2024-09-21 04:00:54 -07:00
|
|
|
},
|
|
|
|
}
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def symlink_to_lib(binary, bin_dir=settings.CONFIG.BIN_DIR) -> None:
|
|
|
|
if not (binary.abspath and binary.abspath.exists()):
|
|
|
|
return
|
|
|
|
bin_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
symlink = bin_dir / binary.name
|
|
|
|
|
|
|
|
if platform.system().lower() == 'darwin':
|
|
|
|
# if on macOS, browser binary is inside a .app, so we need to create a tiny bash script instead of a symlink
|
2024-09-22 13:17:10 -07:00
|
|
|
create_macos_app_symlink(binary.abspath, symlink)
|
2024-09-21 04:00:54 -07:00
|
|
|
else:
|
|
|
|
# otherwise on linux we can symlink directly to binary executable
|
|
|
|
symlink.symlink_to(binary.abspath)
|
|
|
|
|
|
|
|
|
|
|
|
CHROME_BINARY = ChromeBinary()
|
|
|
|
|
|
|
|
PLUGIN_BINARIES = [CHROME_BINARY]
|
|
|
|
|
|
|
|
class ChromePlugin(BasePlugin):
|
2024-09-22 13:17:10 -07:00
|
|
|
app_label: str = 'chrome'
|
|
|
|
verbose_name: str = 'Chrome Browser'
|
2024-09-21 04:00:54 -07:00
|
|
|
|
|
|
|
hooks: List[InstanceOf[BaseHook]] = [
|
|
|
|
CHROME_CONFIG,
|
|
|
|
CHROME_BINARY,
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
PLUGIN = ChromePlugin()
|
|
|
|
PLUGIN.register(settings)
|
|
|
|
DJANGO_APP = PLUGIN.AppConfig
|