mirror of
https://github.com/ArchiveBox/ArchiveBox
synced 2024-11-29 23:50:22 +00:00
Don't be strict on unicode errors
This commit is contained in:
parent
dba423a568
commit
b864c38d9e
1 changed files with 10 additions and 1 deletions
|
@ -74,7 +74,16 @@ def save_media(link: Link, out_dir: Optional[Path]=None, timeout: int=MEDIA_TIME
|
||||||
# add video description and subtitles to full-text index
|
# add video description and subtitles to full-text index
|
||||||
# Let's try a few different
|
# Let's try a few different
|
||||||
index_texts = [
|
index_texts = [
|
||||||
text_file.read_text(encoding='utf-8').strip()
|
# errors:
|
||||||
|
# * 'strict' to raise a ValueError exception if there is an
|
||||||
|
# encoding error. The default value of None has the same effect.
|
||||||
|
# * 'ignore' ignores errors. Note that ignoring encoding errors
|
||||||
|
# can lead to data loss.
|
||||||
|
# * 'xmlcharrefreplace' is only supported when writing to a
|
||||||
|
# file. Characters not supported by the encoding are replaced with
|
||||||
|
# the appropriate XML character reference &#nnn;.
|
||||||
|
# There are a few more options described in https://docs.python.org/3/library/functions.html#open
|
||||||
|
text_file.read_text(encoding='utf-8', errors='xmlcharrefreplace').strip()
|
||||||
for text_file in (
|
for text_file in (
|
||||||
*output_path.glob('*.description'),
|
*output_path.glob('*.description'),
|
||||||
*output_path.glob('*.srt'),
|
*output_path.glob('*.srt'),
|
||||||
|
|
Loading…
Reference in a new issue