fix extractor path calculation

This commit is contained in:
Nick Sweeting 2024-10-01 21:44:56 -07:00
parent 8498ca5c64
commit 276a505cae
No known key found for this signature in database
2 changed files with 4 additions and 4 deletions

View file

@ -40,7 +40,7 @@ BUILTIN_PLUGIN_DIRS = {
'plugins_extractor': PACKAGE_DIR / 'plugins_extractor',
}
USER_PLUGIN_DIRS = {
'user_plugins': DATA_DIR / 'user_plugins',
'user_plugins': DATA_DIR / 'user_plugins',
}
BUILTIN_PLUGINS = abx.get_plugins_in_dirs(BUILTIN_PLUGIN_DIRS)

View file

@ -86,7 +86,7 @@ WGET_BINARY = WgetBinary()
class WgetExtractor(BaseExtractor):
name: ExtractorName = 'wget'
binary: str = WGET_BINARY.name
binary: BinName = WGET_BINARY.name
def get_output_path(self, snapshot) -> Path | None:
wget_index_path = wget_output_path(snapshot.as_link())
@ -99,10 +99,10 @@ WGET_EXTRACTOR = WgetExtractor()
class WarcExtractor(BaseExtractor):
name: ExtractorName = 'warc'
binary: str = WGET_BINARY.name
binary: BinName = WGET_BINARY.name
def get_output_path(self, snapshot) -> Path | None:
warc_files = (snapshot.link_dir / 'warc').glob('*.warc.gz')
warc_files = list((Path(snapshot.link_dir) / 'warc').glob('*.warc.gz'))
if warc_files:
return sorted(warc_files, key=lambda x: x.stat().st_size, reverse=True)[0]
return None