mirror of
https://github.com/ArchiveBox/ArchiveBox
synced 2024-11-10 06:34:16 +00:00
feat: Add WGET_ARGS to control wget arguments
This commit is contained in:
parent
65530e1e5b
commit
24e7a74855
3 changed files with 15 additions and 9 deletions
|
@ -120,7 +120,17 @@ CONFIG_DEFAULTS: Dict[str, ConfigDefaultDict] = {
|
|||
'--audio-format', 'mp3',
|
||||
'--audio-quality', '320K',
|
||||
'--embed-thumbnail',
|
||||
'--add-metadata']}
|
||||
'--add-metadata']},
|
||||
|
||||
'WGET_ARGS': {'type': list, 'default': ['--no-verbose',
|
||||
'--adjust-extension',
|
||||
'--convert-links',
|
||||
'--force-directories',
|
||||
'--backup-converted',
|
||||
'--span-hosts',
|
||||
'--no-parent',
|
||||
'-e', 'robots=off',
|
||||
]}
|
||||
},
|
||||
|
||||
'DEPENDENCY_CONFIG': {
|
||||
|
@ -276,6 +286,7 @@ DERIVED_CONFIG_DEFAULTS: ConfigDefaultDict = {
|
|||
'WGET_USER_AGENT': {'default': lambda c: c['WGET_USER_AGENT'].format(**c)},
|
||||
'SAVE_WGET': {'default': lambda c: c['USE_WGET'] and c['SAVE_WGET']},
|
||||
'SAVE_WARC': {'default': lambda c: c['USE_WGET'] and c['SAVE_WARC']},
|
||||
'WGET_ARGS': {'default': lambda c: c['WGET_ARGS'] or []},
|
||||
|
||||
'USE_SINGLEFILE': {'default': lambda c: c['USE_SINGLEFILE'] and c['SAVE_SINGLEFILE']},
|
||||
'SINGLEFILE_VERSION': {'default': lambda c: bin_version(c['SINGLEFILE_BINARY']) if c['USE_SINGLEFILE'] else None},
|
||||
|
|
|
@ -95,6 +95,7 @@ class ConfigDict(BaseConfig, total=False):
|
|||
CHROME_BINARY: Optional[str]
|
||||
|
||||
YOUTUBEDL_ARGS: Optional[str]
|
||||
WGET_ARGS: Optional[str]
|
||||
|
||||
|
||||
ConfigDefaultValueGetter = Callable[[ConfigDict], ConfigValue]
|
||||
|
|
|
@ -19,6 +19,7 @@ from ..util import (
|
|||
urldecode,
|
||||
)
|
||||
from ..config import (
|
||||
WGET_ARGS,
|
||||
TIMEOUT,
|
||||
SAVE_WGET,
|
||||
SAVE_WARC,
|
||||
|
@ -59,14 +60,7 @@ def save_wget(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEOUT) ->
|
|||
cmd = [
|
||||
WGET_BINARY,
|
||||
# '--server-response', # print headers for better error parsing
|
||||
'--no-verbose',
|
||||
'--adjust-extension',
|
||||
'--convert-links',
|
||||
'--force-directories',
|
||||
'--backup-converted',
|
||||
'--span-hosts',
|
||||
'--no-parent',
|
||||
'-e', 'robots=off',
|
||||
*WGET_ARGS,
|
||||
'--timeout={}'.format(timeout),
|
||||
*(['--restrict-file-names={}'.format(RESTRICT_FILE_NAMES)] if RESTRICT_FILE_NAMES else []),
|
||||
*(['--warc-file={}'.format(str(warc_path))] if SAVE_WARC else []),
|
||||
|
|
Loading…
Reference in a new issue