diff --git a/archivebox/config.py b/archivebox/config.py index aac32756..9988de6b 100644 --- a/archivebox/config.py +++ b/archivebox/config.py @@ -886,7 +886,7 @@ def hint(text: Union[Tuple[str, ...], List[str], str], prefix=' ', config: Op # Dependency Metadata Helpers -def bin_version(binary: Optional[str]) -> Optional[str]: +def bin_version(binary: Optional[str], cmd: Optional[str]=None) -> Optional[str]: """check the presence and return valid version line of a specified binary""" abspath = bin_path(binary) @@ -895,11 +895,22 @@ def bin_version(binary: Optional[str]) -> Optional[str]: try: bin_env = os.environ | {'LANG': 'C'} - version_str = run([abspath, "--version"], stdout=PIPE, env=bin_env).stdout.strip().decode() + is_cmd_str = cmd and isinstance(cmd, str) + version_str = run(cmd or [abspath, "--version"], shell=is_cmd_str, stdout=PIPE, stderr=STDOUT, env=bin_env).stdout.strip().decode() if not version_str: - version_str = run([abspath, "--version"], stdout=PIPE).stdout.strip().decode() + version_str = run(cmd or [abspath, "--version"], shell=is_cmd_str, stdout=PIPE, stderr=STDOUT).stdout.strip().decode() + # take first 3 columns of first line of version info - return ' '.join(version_str.split('\n')[0].strip().split()[:3]) + version_ptn = re.compile(r"\d+?\.\d+?\.?\d*", re.MULTILINE) + try: + version_nums = version_ptn.findall(version_str.split('\n')[0])[0] + if version_nums: + return version_nums + else: + raise IndexError + except IndexError: + # take first 3 columns of first line of version info + return ' '.join(version_str.split('\n')[0].strip().split()[:3]) except OSError: pass # stderr(f'[X] Unable to find working version of dependency: {binary}', color='red') diff --git a/archivebox/core/settings.py b/archivebox/core/settings.py index da03ffd8..ef08643e 100644 --- a/archivebox/core/settings.py +++ b/archivebox/core/settings.py @@ -380,21 +380,21 @@ IGNORABLE_404_URLS = [ ] class NoisyRequestsFilter(logging.Filter): - def filter(self, record): + def filter(self, record) -> bool: logline = record.getMessage() # ignore harmless 404s for the patterns in IGNORABLE_404_URLS for ignorable_url_pattern in IGNORABLE_404_URLS: ignorable_log_pattern = re.compile(f'^"GET /.*/?{ignorable_url_pattern.pattern[:-1]} HTTP/.*" (200|30.|404) .+$', re.I | re.M) if ignorable_log_pattern.match(logline): - return 0 + return False # ignore staticfile requests that 200 or 30* ignoreable_200_log_pattern = re.compile(r'"GET /static/.* HTTP/.*" (200|30.) .+', re.I | re.M) if ignoreable_200_log_pattern.match(logline): - return 0 + return False - return 1 + return True if CONFIG.LOGS_DIR.exists(): ERROR_LOG = (CONFIG.LOGS_DIR / 'errors.log')