mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-01-07 18:59:08 +00:00
Merge pull request #9367 from codesparkle/master
Feature: --restrict-filenames: replace accented characters by their unaccented counterpart instead of "_"
This commit is contained in:
commit
109db8ea64
2 changed files with 15 additions and 4 deletions
|
@ -139,8 +139,8 @@ class TestUtil(unittest.TestCase):
|
||||||
self.assertEqual('yes_no', sanitize_filename('yes? no', restricted=True))
|
self.assertEqual('yes_no', sanitize_filename('yes? no', restricted=True))
|
||||||
self.assertEqual('this_-_that', sanitize_filename('this: that', restricted=True))
|
self.assertEqual('this_-_that', sanitize_filename('this: that', restricted=True))
|
||||||
|
|
||||||
tests = 'a\xe4b\u4e2d\u56fd\u7684c'
|
tests = 'aäb\u4e2d\u56fd\u7684c'
|
||||||
self.assertEqual(sanitize_filename(tests, restricted=True), 'a_b_c')
|
self.assertEqual(sanitize_filename(tests, restricted=True), 'aab_c')
|
||||||
self.assertTrue(sanitize_filename('\xf6', restricted=True) != '') # No empty filename
|
self.assertTrue(sanitize_filename('\xf6', restricted=True) != '') # No empty filename
|
||||||
|
|
||||||
forbidden = '"\0\\/&!: \'\t\n()[]{}$;`^,#'
|
forbidden = '"\0\\/&!: \'\t\n()[]{}$;`^,#'
|
||||||
|
@ -155,6 +155,10 @@ class TestUtil(unittest.TestCase):
|
||||||
self.assertTrue(sanitize_filename('-', restricted=True) != '')
|
self.assertTrue(sanitize_filename('-', restricted=True) != '')
|
||||||
self.assertTrue(sanitize_filename(':', restricted=True) != '')
|
self.assertTrue(sanitize_filename(':', restricted=True) != '')
|
||||||
|
|
||||||
|
self.assertEqual(sanitize_filename(
|
||||||
|
'ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ', restricted=True),
|
||||||
|
'AAAAAAAECEEEEIIIIDNOOOOOOUUUUYPssaaaaaaaeceeeeiiiionoooooouuuuypy')
|
||||||
|
|
||||||
def test_sanitize_ids(self):
|
def test_sanitize_ids(self):
|
||||||
self.assertEqual(sanitize_filename('_n_cd26wFpw', is_id=True), '_n_cd26wFpw')
|
self.assertEqual(sanitize_filename('_n_cd26wFpw', is_id=True), '_n_cd26wFpw')
|
||||||
self.assertEqual(sanitize_filename('_BD_eEpuzXw', is_id=True), '_BD_eEpuzXw')
|
self.assertEqual(sanitize_filename('_BD_eEpuzXw', is_id=True), '_BD_eEpuzXw')
|
||||||
|
|
|
@ -14,8 +14,8 @@ import email.utils
|
||||||
import errno
|
import errno
|
||||||
import functools
|
import functools
|
||||||
import gzip
|
import gzip
|
||||||
import itertools
|
|
||||||
import io
|
import io
|
||||||
|
import itertools
|
||||||
import json
|
import json
|
||||||
import locale
|
import locale
|
||||||
import math
|
import math
|
||||||
|
@ -24,8 +24,8 @@ import os
|
||||||
import pipes
|
import pipes
|
||||||
import platform
|
import platform
|
||||||
import re
|
import re
|
||||||
import ssl
|
|
||||||
import socket
|
import socket
|
||||||
|
import ssl
|
||||||
import struct
|
import struct
|
||||||
import subprocess
|
import subprocess
|
||||||
import sys
|
import sys
|
||||||
|
@ -89,6 +89,11 @@ KNOWN_EXTENSIONS = (
|
||||||
'wav',
|
'wav',
|
||||||
'f4f', 'f4m', 'm3u8', 'smil')
|
'f4f', 'f4m', 'm3u8', 'smil')
|
||||||
|
|
||||||
|
# needed for sanitizing filenames in restricted mode
|
||||||
|
ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ',
|
||||||
|
itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOUUUUYP', ['ss'],
|
||||||
|
'aaaaaa', ['ae'], 'ceeeeiiiionoooooouuuuypy')))
|
||||||
|
|
||||||
|
|
||||||
def preferredencoding():
|
def preferredencoding():
|
||||||
"""Get preferred encoding.
|
"""Get preferred encoding.
|
||||||
|
@ -365,6 +370,8 @@ def sanitize_filename(s, restricted=False, is_id=False):
|
||||||
Set is_id if this is not an arbitrary string, but an ID that should be kept if possible
|
Set is_id if this is not an arbitrary string, but an ID that should be kept if possible
|
||||||
"""
|
"""
|
||||||
def replace_insane(char):
|
def replace_insane(char):
|
||||||
|
if restricted and char in ACCENT_CHARS:
|
||||||
|
return ACCENT_CHARS[char]
|
||||||
if char == '?' or ord(char) < 32 or ord(char) == 127:
|
if char == '?' or ord(char) < 32 or ord(char) == 127:
|
||||||
return ''
|
return ''
|
||||||
elif char == '"':
|
elif char == '"':
|
||||||
|
|
Loading…
Reference in a new issue