diff --git a/.github/workflows/core.yml b/.github/workflows/core.yml
index 7acaee1e83..049faf3738 100644
--- a/.github/workflows/core.yml
+++ b/.github/workflows/core.yml
@@ -32,7 +32,7 @@ jobs:
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
- - name: Install pytest
+ - name: Install dependencies
run: pip install pytest -r requirements.txt
- name: Run tests
continue-on-error: False
diff --git a/README.md b/README.md
index dd4652d43a..3b7432474d 100644
--- a/README.md
+++ b/README.md
@@ -157,6 +157,7 @@ Some of yt-dlp's default options are different from that of youtube-dl and youtu
* yt-dlp's sanitization of invalid characters in filenames is different/smarter than in youtube-dl. You can use `--compat-options filename-sanitization` to revert to youtube-dl's behavior
* yt-dlp tries to parse the external downloader outputs into the standard progress output if possible (Currently implemented: [~~aria2c~~](https://github.com/yt-dlp/yt-dlp/issues/5931)). You can use `--compat-options no-external-downloader-progress` to get the downloader output as-is
* yt-dlp versions between 2021.09.01 and 2023.01.02 applies `--match-filter` to nested playlists. This was an unintentional side-effect of [8f18ac](https://github.com/yt-dlp/yt-dlp/commit/8f18aca8717bb0dd49054555af8d386e5eda3a88) and is fixed in [d7b460](https://github.com/yt-dlp/yt-dlp/commit/d7b460d0e5fc710950582baed2e3fc616ed98a80). Use `--compat-options playlist-match-filter` to revert this
+* yt-dlp uses modern http client backends such as `requests`. Use `--compat-options prefer-legacy-http-handler` to prefer the legacy http handler (`urllib`) to be used for standard http requests.
For ease of use, a few more compat options are available:
@@ -164,7 +165,7 @@ For ease of use, a few more compat options are available:
* `--compat-options youtube-dl`: Same as `--compat-options all,-multistreams,-playlist-match-filter`
* `--compat-options youtube-dlc`: Same as `--compat-options all,-no-live-chat,-no-youtube-channel-redirect,-playlist-match-filter`
* `--compat-options 2021`: Same as `--compat-options 2022,no-certifi,filename-sanitization,no-youtube-prefer-utc-upload-date`
-* `--compat-options 2022`: Same as `--compat-options playlist-match-filter,no-external-downloader-progress`. Use this to enable all future compat options
+* `--compat-options 2022`: Same as `--compat-options playlist-match-filter,no-external-downloader-progress,prefer-legacy-http-handler`. Use this to enable all future compat options
# INSTALLATION
@@ -274,6 +275,7 @@ While all the other dependencies are optional, `ffmpeg` and `ffprobe` are highly
* [**certifi**](https://github.com/certifi/python-certifi)\* - Provides Mozilla's root certificate bundle. Licensed under [MPLv2](https://github.com/certifi/python-certifi/blob/master/LICENSE)
* [**brotli**](https://github.com/google/brotli)\* or [**brotlicffi**](https://github.com/python-hyper/brotlicffi) - [Brotli](https://en.wikipedia.org/wiki/Brotli) content encoding support. Both licensed under MIT [1](https://github.com/google/brotli/blob/master/LICENSE) [2](https://github.com/python-hyper/brotlicffi/blob/master/LICENSE)
* [**websockets**](https://github.com/aaugustin/websockets)\* - For downloading over websocket. Licensed under [BSD-3-Clause](https://github.com/aaugustin/websockets/blob/main/LICENSE)
+* [**requests**](https://github.com/psf/requests)\* - HTTP library. For HTTPS proxy and persistent connections support. Licensed under [Apache-2.0](https://github.com/psf/requests/blob/main/LICENSE)
### Metadata
diff --git a/requirements.txt b/requirements.txt
index dde37120f7..112c30aeb7 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -4,3 +4,5 @@ websockets
brotli; platform_python_implementation=='CPython'
brotlicffi; platform_python_implementation!='CPython'
certifi
+requests>=2.31.0,<3
+urllib3>=1.26.17,<3
\ No newline at end of file
diff --git a/setup.py b/setup.py
index a2f9f55c36..1740db27d8 100644
--- a/setup.py
+++ b/setup.py
@@ -62,7 +62,14 @@ def py2exe_params():
'compressed': 1,
'optimize': 2,
'dist_dir': './dist',
- 'excludes': ['Crypto', 'Cryptodome'], # py2exe cannot import Crypto
+ 'excludes': [
+ # py2exe cannot import Crypto
+ 'Crypto',
+ 'Cryptodome',
+ # py2exe appears to confuse this with our socks library.
+ # We don't use pysocks and urllib3.contrib.socks would fail to import if tried.
+ 'urllib3.contrib.socks'
+ ],
'dll_excludes': ['w9xpopen.exe', 'crypt32.dll'],
# Modules that are only imported dynamically must be added here
'includes': ['yt_dlp.compat._legacy', 'yt_dlp.compat._deprecated',
diff --git a/test/test_networking.py b/test/test_networking.py
index 5308c8d6fa..2b45deac79 100644
--- a/test/test_networking.py
+++ b/test/test_networking.py
@@ -28,7 +28,7 @@ from http.cookiejar import CookieJar
from test.helper import FakeYDL, http_server_port
from yt_dlp.cookies import YoutubeDLCookieJar
-from yt_dlp.dependencies import brotli
+from yt_dlp.dependencies import brotli, requests, urllib3
from yt_dlp.networking import (
HEADRequest,
PUTRequest,
@@ -43,6 +43,7 @@ from yt_dlp.networking.exceptions import (
HTTPError,
IncompleteRead,
NoSupportingHandlers,
+ ProxyError,
RequestError,
SSLError,
TransportError,
@@ -305,7 +306,7 @@ class TestRequestHandlerBase:
class TestHTTPRequestHandler(TestRequestHandlerBase):
- @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
+ @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
def test_verify_cert(self, handler):
with handler() as rh:
with pytest.raises(CertificateVerifyError):
@@ -316,7 +317,7 @@ class TestHTTPRequestHandler(TestRequestHandlerBase):
assert r.status == 200
r.close()
- @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
+ @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
def test_ssl_error(self, handler):
# HTTPS server with too old TLS version
# XXX: is there a better way to test this than to create a new server?
@@ -334,7 +335,7 @@ class TestHTTPRequestHandler(TestRequestHandlerBase):
validate_and_send(rh, Request(f'https://127.0.0.1:{https_port}/headers'))
assert not issubclass(exc_info.type, CertificateVerifyError)
- @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
+ @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
def test_percent_encode(self, handler):
with handler() as rh:
# Unicode characters should be encoded with uppercase percent-encoding
@@ -346,7 +347,7 @@ class TestHTTPRequestHandler(TestRequestHandlerBase):
assert res.status == 200
res.close()
- @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
+ @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
def test_remove_dot_segments(self, handler):
with handler() as rh:
# This isn't a comprehensive test,
@@ -361,14 +362,14 @@ class TestHTTPRequestHandler(TestRequestHandlerBase):
assert res.url == f'http://127.0.0.1:{self.http_port}/headers'
res.close()
- @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
+ @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
def test_unicode_path_redirection(self, handler):
with handler() as rh:
r = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/302-non-ascii-redirect'))
assert r.url == f'http://127.0.0.1:{self.http_port}/%E4%B8%AD%E6%96%87.html'
r.close()
- @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
+ @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
def test_raise_http_error(self, handler):
with handler() as rh:
for bad_status in (400, 500, 599, 302):
@@ -378,7 +379,7 @@ class TestHTTPRequestHandler(TestRequestHandlerBase):
# Should not raise an error
validate_and_send(rh, Request('http://127.0.0.1:%d/gen_200' % self.http_port)).close()
- @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
+ @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
def test_response_url(self, handler):
with handler() as rh:
# Response url should be that of the last url in redirect chain
@@ -389,7 +390,7 @@ class TestHTTPRequestHandler(TestRequestHandlerBase):
assert res2.url == f'http://127.0.0.1:{self.http_port}/gen_200'
res2.close()
- @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
+ @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
def test_redirect(self, handler):
with handler() as rh:
def do_req(redirect_status, method, assert_no_content=False):
@@ -444,7 +445,7 @@ class TestHTTPRequestHandler(TestRequestHandlerBase):
with pytest.raises(HTTPError):
do_req(code, 'GET')
- @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
+ @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
def test_request_cookie_header(self, handler):
# We should accept a Cookie header being passed as in normal headers and handle it appropriately.
with handler() as rh:
@@ -476,19 +477,19 @@ class TestHTTPRequestHandler(TestRequestHandlerBase):
assert b'Cookie: test=ytdlp' not in data
assert b'Cookie: test=test' in data
- @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
+ @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
def test_redirect_loop(self, handler):
with handler() as rh:
with pytest.raises(HTTPError, match='redirect loop'):
validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/redirect_loop'))
- @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
+ @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
def test_incompleteread(self, handler):
with handler(timeout=2) as rh:
with pytest.raises(IncompleteRead):
validate_and_send(rh, Request('http://127.0.0.1:%d/incompleteread' % self.http_port)).read()
- @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
+ @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
def test_cookies(self, handler):
cookiejar = YoutubeDLCookieJar()
cookiejar.set_cookie(http.cookiejar.Cookie(
@@ -505,7 +506,7 @@ class TestHTTPRequestHandler(TestRequestHandlerBase):
rh, Request(f'http://127.0.0.1:{self.http_port}/headers', extensions={'cookiejar': cookiejar})).read()
assert b'Cookie: test=ytdlp' in data
- @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
+ @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
def test_headers(self, handler):
with handler(headers=HTTPHeaderDict({'test1': 'test', 'test2': 'test2'})) as rh:
@@ -521,7 +522,7 @@ class TestHTTPRequestHandler(TestRequestHandlerBase):
assert b'Test2: test2' not in data
assert b'Test3: test3' in data
- @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
+ @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
def test_timeout(self, handler):
with handler() as rh:
# Default timeout is 20 seconds, so this should go through
@@ -537,7 +538,7 @@ class TestHTTPRequestHandler(TestRequestHandlerBase):
validate_and_send(
rh, Request(f'http://127.0.0.1:{self.http_port}/timeout_1', extensions={'timeout': 4}))
- @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
+ @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
def test_source_address(self, handler):
source_address = f'127.0.0.{random.randint(5, 255)}'
with handler(source_address=source_address) as rh:
@@ -545,13 +546,13 @@ class TestHTTPRequestHandler(TestRequestHandlerBase):
rh, Request(f'http://127.0.0.1:{self.http_port}/source_address')).read().decode()
assert source_address == data
- @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
+ @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
def test_gzip_trailing_garbage(self, handler):
with handler() as rh:
data = validate_and_send(rh, Request(f'http://localhost:{self.http_port}/trailing_garbage')).read().decode()
assert data == ''
- @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
+ @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
@pytest.mark.skipif(not brotli, reason='brotli support is not installed')
def test_brotli(self, handler):
with handler() as rh:
@@ -562,7 +563,7 @@ class TestHTTPRequestHandler(TestRequestHandlerBase):
assert res.headers.get('Content-Encoding') == 'br'
assert res.read() == b''
- @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
+ @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
def test_deflate(self, handler):
with handler() as rh:
res = validate_and_send(
@@ -572,7 +573,7 @@ class TestHTTPRequestHandler(TestRequestHandlerBase):
assert res.headers.get('Content-Encoding') == 'deflate'
assert res.read() == b''
- @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
+ @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
def test_gzip(self, handler):
with handler() as rh:
res = validate_and_send(
@@ -582,7 +583,7 @@ class TestHTTPRequestHandler(TestRequestHandlerBase):
assert res.headers.get('Content-Encoding') == 'gzip'
assert res.read() == b''
- @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
+ @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
def test_multiple_encodings(self, handler):
with handler() as rh:
for pair in ('gzip,deflate', 'deflate, gzip', 'gzip, gzip', 'deflate, deflate'):
@@ -593,7 +594,7 @@ class TestHTTPRequestHandler(TestRequestHandlerBase):
assert res.headers.get('Content-Encoding') == pair
assert res.read() == b''
- @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
+ @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
def test_unsupported_encoding(self, handler):
with handler() as rh:
res = validate_and_send(
@@ -603,7 +604,7 @@ class TestHTTPRequestHandler(TestRequestHandlerBase):
assert res.headers.get('Content-Encoding') == 'unsupported'
assert res.read() == b'raw'
- @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
+ @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
def test_read(self, handler):
with handler() as rh:
res = validate_and_send(
@@ -633,7 +634,7 @@ class TestHTTPProxy(TestRequestHandlerBase):
cls.geo_proxy_thread.daemon = True
cls.geo_proxy_thread.start()
- @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
+ @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
def test_http_proxy(self, handler):
http_proxy = f'http://127.0.0.1:{self.proxy_port}'
geo_proxy = f'http://127.0.0.1:{self.geo_port}'
@@ -659,7 +660,7 @@ class TestHTTPProxy(TestRequestHandlerBase):
assert res != f'normal: {real_url}'
assert 'Accept' in res
- @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
+ @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
def test_noproxy(self, handler):
with handler(proxies={'proxy': f'http://127.0.0.1:{self.proxy_port}'}) as rh:
# NO_PROXY
@@ -669,7 +670,7 @@ class TestHTTPProxy(TestRequestHandlerBase):
'utf-8')
assert 'Accept' in nop_response
- @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
+ @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
def test_allproxy(self, handler):
url = 'http://foo.com/bar'
with handler() as rh:
@@ -677,7 +678,7 @@ class TestHTTPProxy(TestRequestHandlerBase):
'utf-8')
assert response == f'normal: {url}'
- @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
+ @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
def test_http_proxy_with_idn(self, handler):
with handler(proxies={
'http': f'http://127.0.0.1:{self.proxy_port}',
@@ -715,27 +716,27 @@ class TestClientCertificate:
) as rh:
validate_and_send(rh, Request(f'https://127.0.0.1:{self.port}/video.html')).read().decode()
- @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
+ @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
def test_certificate_combined_nopass(self, handler):
self._run_test(handler, client_cert={
'client_certificate': os.path.join(self.certdir, 'clientwithkey.crt'),
})
- @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
+ @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
def test_certificate_nocombined_nopass(self, handler):
self._run_test(handler, client_cert={
'client_certificate': os.path.join(self.certdir, 'client.crt'),
'client_certificate_key': os.path.join(self.certdir, 'client.key'),
})
- @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
+ @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
def test_certificate_combined_pass(self, handler):
self._run_test(handler, client_cert={
'client_certificate': os.path.join(self.certdir, 'clientwithencryptedkey.crt'),
'client_certificate_password': 'foobar',
})
- @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
+ @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
def test_certificate_nocombined_pass(self, handler):
self._run_test(handler, client_cert={
'client_certificate': os.path.join(self.certdir, 'client.crt'),
@@ -819,6 +820,75 @@ class TestUrllibRequestHandler(TestRequestHandlerBase):
assert not isinstance(exc_info.value, TransportError)
+class TestRequestsRequestHandler(TestRequestHandlerBase):
+ @pytest.mark.parametrize('raised,expected', [
+ (lambda: requests.exceptions.ConnectTimeout(), TransportError),
+ (lambda: requests.exceptions.ReadTimeout(), TransportError),
+ (lambda: requests.exceptions.Timeout(), TransportError),
+ (lambda: requests.exceptions.ConnectionError(), TransportError),
+ (lambda: requests.exceptions.ProxyError(), ProxyError),
+ (lambda: requests.exceptions.SSLError('12[CERTIFICATE_VERIFY_FAILED]34'), CertificateVerifyError),
+ (lambda: requests.exceptions.SSLError(), SSLError),
+ (lambda: requests.exceptions.InvalidURL(), RequestError),
+ (lambda: requests.exceptions.InvalidHeader(), RequestError),
+ # catch-all: https://github.com/psf/requests/blob/main/src/requests/adapters.py#L535
+ (lambda: urllib3.exceptions.HTTPError(), TransportError),
+ (lambda: requests.exceptions.RequestException(), RequestError)
+ # (lambda: requests.exceptions.TooManyRedirects(), HTTPError) - Needs a response object
+ ])
+ @pytest.mark.parametrize('handler', ['Requests'], indirect=True)
+ def test_request_error_mapping(self, handler, monkeypatch, raised, expected):
+ with handler() as rh:
+ def mock_get_instance(*args, **kwargs):
+ class MockSession:
+ def request(self, *args, **kwargs):
+ raise raised()
+ return MockSession()
+
+ monkeypatch.setattr(rh, '_get_instance', mock_get_instance)
+
+ with pytest.raises(expected) as exc_info:
+ rh.send(Request('http://fake'))
+
+ assert exc_info.type is expected
+
+ @pytest.mark.parametrize('raised,expected,match', [
+ (lambda: urllib3.exceptions.SSLError(), SSLError, None),
+ (lambda: urllib3.exceptions.TimeoutError(), TransportError, None),
+ (lambda: urllib3.exceptions.ReadTimeoutError(None, None, None), TransportError, None),
+ (lambda: urllib3.exceptions.ProtocolError(), TransportError, None),
+ (lambda: urllib3.exceptions.DecodeError(), TransportError, None),
+ (lambda: urllib3.exceptions.HTTPError(), TransportError, None), # catch-all
+ (
+ lambda: urllib3.exceptions.ProtocolError('error', http.client.IncompleteRead(partial=b'abc', expected=4)),
+ IncompleteRead,
+ '3 bytes read, 4 more expected'
+ ),
+ (
+ lambda: urllib3.exceptions.IncompleteRead(partial=3, expected=5),
+ IncompleteRead,
+ '3 bytes read, 5 more expected'
+ ),
+ ])
+ @pytest.mark.parametrize('handler', ['Requests'], indirect=True)
+ def test_response_error_mapping(self, handler, monkeypatch, raised, expected, match):
+ from urllib3.response import HTTPResponse as Urllib3Response
+ from requests.models import Response as RequestsResponse
+ from yt_dlp.networking._requests import RequestsResponseAdapter
+ requests_res = RequestsResponse()
+ requests_res.raw = Urllib3Response(body=b'', status=200)
+ res = RequestsResponseAdapter(requests_res)
+
+ def mock_read(*args, **kwargs):
+ raise raised()
+ monkeypatch.setattr(res.fp, 'read', mock_read)
+
+ with pytest.raises(expected, match=match) as exc_info:
+ res.read()
+
+ assert exc_info.type is expected
+
+
def run_validation(handler, error, req, **handler_kwargs):
with handler(**handler_kwargs) as rh:
if error:
@@ -855,6 +925,10 @@ class TestRequestHandlerValidation:
('file', UnsupportedRequest, {}),
('file', False, {'enable_file_urls': True}),
]),
+ ('Requests', [
+ ('http', False, {}),
+ ('https', False, {}),
+ ]),
(NoCheckRH, [('http', False, {})]),
(ValidationRH, [('http', UnsupportedRequest, {})])
]
@@ -870,6 +944,14 @@ class TestRequestHandlerValidation:
('socks5h', False),
('socks', UnsupportedRequest),
]),
+ ('Requests', [
+ ('http', False),
+ ('https', False),
+ ('socks4', False),
+ ('socks4a', False),
+ ('socks5', False),
+ ('socks5h', False),
+ ]),
(NoCheckRH, [('http', False)]),
(HTTPSupportedRH, [('http', UnsupportedRequest)]),
]
@@ -880,6 +962,10 @@ class TestRequestHandlerValidation:
('all', False),
('unrelated', False),
]),
+ ('Requests', [
+ ('all', False),
+ ('unrelated', False),
+ ]),
(NoCheckRH, [('all', False)]),
(HTTPSupportedRH, [('all', UnsupportedRequest)]),
(HTTPSupportedRH, [('no', UnsupportedRequest)]),
@@ -894,6 +980,13 @@ class TestRequestHandlerValidation:
({'timeout': 'notatimeout'}, AssertionError),
({'unsupported': 'value'}, UnsupportedRequest),
]),
+ ('Requests', [
+ ({'cookiejar': 'notacookiejar'}, AssertionError),
+ ({'cookiejar': YoutubeDLCookieJar()}, False),
+ ({'timeout': 1}, False),
+ ({'timeout': 'notatimeout'}, AssertionError),
+ ({'unsupported': 'value'}, UnsupportedRequest),
+ ]),
(NoCheckRH, [
({'cookiejar': 'notacookiejar'}, False),
({'somerandom': 'test'}, False), # but any extension is allowed through
@@ -909,7 +1002,7 @@ class TestRequestHandlerValidation:
def test_url_scheme(self, handler, scheme, fail, handler_kwargs):
run_validation(handler, fail, Request(f'{scheme}://'), **(handler_kwargs or {}))
- @pytest.mark.parametrize('handler,fail', [('Urllib', False)], indirect=['handler'])
+ @pytest.mark.parametrize('handler,fail', [('Urllib', False), ('Requests', False)], indirect=['handler'])
def test_no_proxy(self, handler, fail):
run_validation(handler, fail, Request('http://', proxies={'no': '127.0.0.1,github.com'}))
run_validation(handler, fail, Request('http://'), proxies={'no': '127.0.0.1,github.com'})
@@ -932,13 +1025,13 @@ class TestRequestHandlerValidation:
run_validation(handler, fail, Request('http://', proxies={'http': f'{scheme}://example.com'}))
run_validation(handler, fail, Request('http://'), proxies={'http': f'{scheme}://example.com'})
- @pytest.mark.parametrize('handler', ['Urllib', HTTPSupportedRH], indirect=True)
+ @pytest.mark.parametrize('handler', ['Urllib', HTTPSupportedRH, 'Requests'], indirect=True)
def test_empty_proxy(self, handler):
run_validation(handler, False, Request('http://', proxies={'http': None}))
run_validation(handler, False, Request('http://'), proxies={'http': None})
@pytest.mark.parametrize('proxy_url', ['//example.com', 'example.com', '127.0.0.1', '/a/b/c'])
- @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
+ @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
def test_invalid_proxy_url(self, handler, proxy_url):
run_validation(handler, UnsupportedRequest, Request('http://', proxies={'http': proxy_url}))
@@ -1242,6 +1335,13 @@ class TestYoutubeDLNetworking:
rh = self.build_handler(ydl, UrllibRH)
assert rh.enable_file_urls is True
+ def test_compat_opt_prefer_urllib(self):
+ # This assumes urllib only has a preference when this compat opt is given
+ with FakeYDL({'compat_opts': ['prefer-legacy-http-handler']}) as ydl:
+ director = ydl.build_request_director([UrllibRH])
+ assert len(director.preferences) == 1
+ assert director.preferences.pop()(UrllibRH, None)
+
class TestRequest:
diff --git a/test/test_socks.py b/test/test_socks.py
index 211ee814d1..d8ac88dad5 100644
--- a/test/test_socks.py
+++ b/test/test_socks.py
@@ -263,7 +263,7 @@ def ctx(request):
class TestSocks4Proxy:
- @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http')], indirect=True)
+ @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http')], indirect=True)
def test_socks4_no_auth(self, handler, ctx):
with handler() as rh:
with ctx.socks_server(Socks4ProxyHandler) as server_address:
@@ -271,7 +271,7 @@ class TestSocks4Proxy:
rh, proxies={'all': f'socks4://{server_address}'})
assert response['version'] == 4
- @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http')], indirect=True)
+ @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http')], indirect=True)
def test_socks4_auth(self, handler, ctx):
with handler() as rh:
with ctx.socks_server(Socks4ProxyHandler, user_id='user') as server_address:
@@ -281,7 +281,7 @@ class TestSocks4Proxy:
rh, proxies={'all': f'socks4://user:@{server_address}'})
assert response['version'] == 4
- @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http')], indirect=True)
+ @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http')], indirect=True)
def test_socks4a_ipv4_target(self, handler, ctx):
with ctx.socks_server(Socks4ProxyHandler) as server_address:
with handler(proxies={'all': f'socks4a://{server_address}'}) as rh:
@@ -289,7 +289,7 @@ class TestSocks4Proxy:
assert response['version'] == 4
assert (response['ipv4_address'] == '127.0.0.1') != (response['domain_address'] == '127.0.0.1')
- @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http')], indirect=True)
+ @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http')], indirect=True)
def test_socks4a_domain_target(self, handler, ctx):
with ctx.socks_server(Socks4ProxyHandler) as server_address:
with handler(proxies={'all': f'socks4a://{server_address}'}) as rh:
@@ -298,7 +298,7 @@ class TestSocks4Proxy:
assert response['ipv4_address'] is None
assert response['domain_address'] == 'localhost'
- @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http')], indirect=True)
+ @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http')], indirect=True)
def test_ipv4_client_source_address(self, handler, ctx):
with ctx.socks_server(Socks4ProxyHandler) as server_address:
source_address = f'127.0.0.{random.randint(5, 255)}'
@@ -308,7 +308,7 @@ class TestSocks4Proxy:
assert response['client_address'][0] == source_address
assert response['version'] == 4
- @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http')], indirect=True)
+ @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http')], indirect=True)
@pytest.mark.parametrize('reply_code', [
Socks4CD.REQUEST_REJECTED_OR_FAILED,
Socks4CD.REQUEST_REJECTED_CANNOT_CONNECT_TO_IDENTD,
@@ -320,7 +320,7 @@ class TestSocks4Proxy:
with pytest.raises(ProxyError):
ctx.socks_info_request(rh)
- @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http')], indirect=True)
+ @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http')], indirect=True)
def test_ipv6_socks4_proxy(self, handler, ctx):
with ctx.socks_server(Socks4ProxyHandler, bind_ip='::1') as server_address:
with handler(proxies={'all': f'socks4://{server_address}'}) as rh:
@@ -329,7 +329,7 @@ class TestSocks4Proxy:
assert response['ipv4_address'] == '127.0.0.1'
assert response['version'] == 4
- @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http')], indirect=True)
+ @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http')], indirect=True)
def test_timeout(self, handler, ctx):
with ctx.socks_server(Socks4ProxyHandler, sleep=2) as server_address:
with handler(proxies={'all': f'socks4://{server_address}'}, timeout=0.5) as rh:
@@ -339,7 +339,7 @@ class TestSocks4Proxy:
class TestSocks5Proxy:
- @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http')], indirect=True)
+ @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http')], indirect=True)
def test_socks5_no_auth(self, handler, ctx):
with ctx.socks_server(Socks5ProxyHandler) as server_address:
with handler(proxies={'all': f'socks5://{server_address}'}) as rh:
@@ -347,7 +347,7 @@ class TestSocks5Proxy:
assert response['auth_methods'] == [0x0]
assert response['version'] == 5
- @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http')], indirect=True)
+ @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http')], indirect=True)
def test_socks5_user_pass(self, handler, ctx):
with ctx.socks_server(Socks5ProxyHandler, auth=('test', 'testpass')) as server_address:
with handler() as rh:
@@ -360,7 +360,7 @@ class TestSocks5Proxy:
assert response['auth_methods'] == [Socks5Auth.AUTH_NONE, Socks5Auth.AUTH_USER_PASS]
assert response['version'] == 5
- @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http')], indirect=True)
+ @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http')], indirect=True)
def test_socks5_ipv4_target(self, handler, ctx):
with ctx.socks_server(Socks5ProxyHandler) as server_address:
with handler(proxies={'all': f'socks5://{server_address}'}) as rh:
@@ -368,7 +368,7 @@ class TestSocks5Proxy:
assert response['ipv4_address'] == '127.0.0.1'
assert response['version'] == 5
- @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http')], indirect=True)
+ @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http')], indirect=True)
def test_socks5_domain_target(self, handler, ctx):
with ctx.socks_server(Socks5ProxyHandler) as server_address:
with handler(proxies={'all': f'socks5://{server_address}'}) as rh:
@@ -376,7 +376,7 @@ class TestSocks5Proxy:
assert (response['ipv4_address'] == '127.0.0.1') != (response['ipv6_address'] == '::1')
assert response['version'] == 5
- @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http')], indirect=True)
+ @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http')], indirect=True)
def test_socks5h_domain_target(self, handler, ctx):
with ctx.socks_server(Socks5ProxyHandler) as server_address:
with handler(proxies={'all': f'socks5h://{server_address}'}) as rh:
@@ -385,7 +385,7 @@ class TestSocks5Proxy:
assert response['domain_address'] == 'localhost'
assert response['version'] == 5
- @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http')], indirect=True)
+ @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http')], indirect=True)
def test_socks5h_ip_target(self, handler, ctx):
with ctx.socks_server(Socks5ProxyHandler) as server_address:
with handler(proxies={'all': f'socks5h://{server_address}'}) as rh:
@@ -394,7 +394,7 @@ class TestSocks5Proxy:
assert response['domain_address'] is None
assert response['version'] == 5
- @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http')], indirect=True)
+ @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http')], indirect=True)
def test_socks5_ipv6_destination(self, handler, ctx):
with ctx.socks_server(Socks5ProxyHandler) as server_address:
with handler(proxies={'all': f'socks5://{server_address}'}) as rh:
@@ -402,7 +402,7 @@ class TestSocks5Proxy:
assert response['ipv6_address'] == '::1'
assert response['version'] == 5
- @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http')], indirect=True)
+ @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http')], indirect=True)
def test_ipv6_socks5_proxy(self, handler, ctx):
with ctx.socks_server(Socks5ProxyHandler, bind_ip='::1') as server_address:
with handler(proxies={'all': f'socks5://{server_address}'}) as rh:
@@ -413,7 +413,7 @@ class TestSocks5Proxy:
# XXX: is there any feasible way of testing IPv6 source addresses?
# Same would go for non-proxy source_address test...
- @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http')], indirect=True)
+ @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http')], indirect=True)
def test_ipv4_client_source_address(self, handler, ctx):
with ctx.socks_server(Socks5ProxyHandler) as server_address:
source_address = f'127.0.0.{random.randint(5, 255)}'
@@ -422,7 +422,7 @@ class TestSocks5Proxy:
assert response['client_address'][0] == source_address
assert response['version'] == 5
- @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http')], indirect=True)
+ @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http')], indirect=True)
@pytest.mark.parametrize('reply_code', [
Socks5Reply.GENERAL_FAILURE,
Socks5Reply.CONNECTION_NOT_ALLOWED,
diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py
index 71d17ac01c..8e11646d3b 100644
--- a/yt_dlp/YoutubeDL.py
+++ b/yt_dlp/YoutubeDL.py
@@ -3968,7 +3968,7 @@ class YoutubeDL:
})) or 'none'))
write_debug(f'Proxy map: {self.proxies}')
- # write_debug(f'Request Handlers: {", ".join(rh.RH_NAME for rh in self._request_director.handlers.values())}')
+ write_debug(f'Request Handlers: {", ".join(rh.RH_NAME for rh in self._request_director.handlers.values())}')
for plugin_type, plugins in {'Extractor': plugin_ies, 'Post-Processor': plugin_pps}.items():
display_list = ['%s%s' % (
klass.__name__, '' if klass.__name__ == name else f' as {name}')
@@ -4057,6 +4057,9 @@ class YoutubeDL:
raise RequestError(
'file:// URLs are disabled by default in yt-dlp for security reasons. '
'Use --enable-file-urls to enable at your own risk.', cause=ue) from ue
+ if 'unsupported proxy type: "https"' in ue.msg.lower():
+ raise RequestError(
+ 'To use an HTTPS proxy for this request, one of the following dependencies needs to be installed: requests')
raise
except SSLError as e:
if 'UNSAFE_LEGACY_RENEGOTIATION_DISABLED' in str(e):
@@ -4099,6 +4102,8 @@ class YoutubeDL:
}),
))
director.preferences.update(preferences or [])
+ if 'prefer-legacy-http-handler' in self.params['compat_opts']:
+ director.preferences.add(lambda rh, _: 500 if rh.RH_KEY == 'Urllib' else 0)
return director
def encode(self, s):
diff --git a/yt_dlp/__pyinstaller/hook-yt_dlp.py b/yt_dlp/__pyinstaller/hook-yt_dlp.py
index 88c2b8b285..c7f2c0ceb7 100644
--- a/yt_dlp/__pyinstaller/hook-yt_dlp.py
+++ b/yt_dlp/__pyinstaller/hook-yt_dlp.py
@@ -21,7 +21,9 @@ def get_hidden_imports():
yield from ('yt_dlp.compat._legacy', 'yt_dlp.compat._deprecated')
yield from ('yt_dlp.utils._legacy', 'yt_dlp.utils._deprecated')
yield pycryptodome_module()
- yield from collect_submodules('websockets')
+ # Only `websockets` is required, others are collected just in case
+ for module in ('websockets', 'requests', 'urllib3'):
+ yield from collect_submodules(module)
# These are auto-detected, but explicitly add them just in case
yield from ('mutagen', 'brotli', 'certifi')
diff --git a/yt_dlp/dependencies/__init__.py b/yt_dlp/dependencies/__init__.py
index b56e4f5cc6..ef83739a3b 100644
--- a/yt_dlp/dependencies/__init__.py
+++ b/yt_dlp/dependencies/__init__.py
@@ -58,6 +58,15 @@ except (ImportError, SyntaxError):
# See https://github.com/yt-dlp/yt-dlp/issues/2633
websockets = None
+try:
+ import urllib3
+except ImportError:
+ urllib3 = None
+
+try:
+ import requests
+except ImportError:
+ requests = None
try:
import xattr # xattr or pyxattr
diff --git a/yt_dlp/networking/__init__.py b/yt_dlp/networking/__init__.py
index 5b1599a6dc..aa8d0eabe4 100644
--- a/yt_dlp/networking/__init__.py
+++ b/yt_dlp/networking/__init__.py
@@ -1,4 +1,6 @@
# flake8: noqa: F401
+import warnings
+
from .common import (
HEADRequest,
PUTRequest,
@@ -11,3 +13,11 @@ from .common import (
# isort: split
# TODO: all request handlers should be safely imported
from . import _urllib
+from ..utils import bug_reports_message
+
+try:
+ from . import _requests
+except ImportError:
+ pass
+except Exception as e:
+ warnings.warn(f'Failed to import "requests" request handler: {e}' + bug_reports_message())
diff --git a/yt_dlp/networking/_helper.py b/yt_dlp/networking/_helper.py
index 4c9dbf25dc..a6fa3550bd 100644
--- a/yt_dlp/networking/_helper.py
+++ b/yt_dlp/networking/_helper.py
@@ -11,7 +11,7 @@ import urllib.request
from .exceptions import RequestError, UnsupportedRequest
from ..dependencies import certifi
-from ..socks import ProxyType
+from ..socks import ProxyType, sockssocket
from ..utils import format_field, traverse_obj
if typing.TYPE_CHECKING:
@@ -224,6 +224,24 @@ def _socket_connect(ip_addr, timeout, source_address):
raise
+def create_socks_proxy_socket(dest_addr, proxy_args, proxy_ip_addr, timeout, source_address):
+ af, socktype, proto, canonname, sa = proxy_ip_addr
+ sock = sockssocket(af, socktype, proto)
+ try:
+ connect_proxy_args = proxy_args.copy()
+ connect_proxy_args.update({'addr': sa[0], 'port': sa[1]})
+ sock.setproxy(**connect_proxy_args)
+ if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT: # noqa: E721
+ sock.settimeout(timeout)
+ if source_address:
+ sock.bind(source_address)
+ sock.connect(dest_addr)
+ return sock
+ except socket.error:
+ sock.close()
+ raise
+
+
def create_connection(
address,
timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
diff --git a/yt_dlp/networking/_requests.py b/yt_dlp/networking/_requests.py
new file mode 100644
index 0000000000..27974357ae
--- /dev/null
+++ b/yt_dlp/networking/_requests.py
@@ -0,0 +1,398 @@
+import contextlib
+import functools
+import http.client
+import logging
+import re
+import socket
+import warnings
+
+from ..dependencies import brotli, requests, urllib3
+from ..utils import bug_reports_message, int_or_none, variadic
+
+if requests is None:
+ raise ImportError('requests module is not installed')
+
+if urllib3 is None:
+ raise ImportError('urllib3 module is not installed')
+
+urllib3_version = tuple(int_or_none(x, default=0) for x in urllib3.__version__.split('.'))
+
+if urllib3_version < (1, 26, 17):
+ raise ImportError('Only urllib3 >= 1.26.17 is supported')
+
+if requests.__build__ < 0x023100:
+ raise ImportError('Only requests >= 2.31.0 is supported')
+
+import requests.adapters
+import requests.utils
+import urllib3.connection
+import urllib3.exceptions
+
+from ._helper import (
+ InstanceStoreMixin,
+ add_accept_encoding_header,
+ create_connection,
+ create_socks_proxy_socket,
+ get_redirect_method,
+ make_socks_proxy_opts,
+ select_proxy,
+)
+from .common import (
+ Features,
+ RequestHandler,
+ Response,
+ register_preference,
+ register_rh,
+)
+from .exceptions import (
+ CertificateVerifyError,
+ HTTPError,
+ IncompleteRead,
+ ProxyError,
+ RequestError,
+ SSLError,
+ TransportError,
+)
+from ..socks import ProxyError as SocksProxyError
+
+SUPPORTED_ENCODINGS = [
+ 'gzip', 'deflate'
+]
+
+if brotli is not None:
+ SUPPORTED_ENCODINGS.append('br')
+
+"""
+Override urllib3's behavior to not convert lower-case percent-encoded characters
+to upper-case during url normalization process.
+
+RFC3986 defines that the lower or upper case percent-encoded hexidecimal characters are equivalent
+and normalizers should convert them to uppercase for consistency [1].
+
+However, some sites may have an incorrect implementation where they provide
+a percent-encoded url that is then compared case-sensitively.[2]
+
+While this is a very rare case, since urllib does not do this normalization step, it
+is best to avoid it in requests too for compatability reasons.
+
+1: https://tools.ietf.org/html/rfc3986#section-2.1
+2: https://github.com/streamlink/streamlink/pull/4003
+"""
+
+
+class Urllib3PercentREOverride:
+ def __init__(self, r: re.Pattern):
+ self.re = r
+
+ # pass through all other attribute calls to the original re
+ def __getattr__(self, item):
+ return self.re.__getattribute__(item)
+
+ def subn(self, repl, string, *args, **kwargs):
+ return string, self.re.subn(repl, string, *args, **kwargs)[1]
+
+
+# urllib3 >= 1.25.8 uses subn:
+# https://github.com/urllib3/urllib3/commit/a2697e7c6b275f05879b60f593c5854a816489f0
+import urllib3.util.url # noqa: E305
+
+if hasattr(urllib3.util.url, 'PERCENT_RE'):
+ urllib3.util.url.PERCENT_RE = Urllib3PercentREOverride(urllib3.util.url.PERCENT_RE)
+elif hasattr(urllib3.util.url, '_PERCENT_RE'): # urllib3 >= 2.0.0
+ urllib3.util.url._PERCENT_RE = Urllib3PercentREOverride(urllib3.util.url._PERCENT_RE)
+else:
+ warnings.warn('Failed to patch PERCENT_RE in urllib3 (does the attribute exist?)' + bug_reports_message())
+
+"""
+Workaround for issue in urllib.util.ssl_.py: ssl_wrap_context does not pass
+server_hostname to SSLContext.wrap_socket if server_hostname is an IP,
+however this is an issue because we set check_hostname to True in our SSLContext.
+
+Monkey-patching IS_SECURETRANSPORT forces ssl_wrap_context to pass server_hostname regardless.
+
+This has been fixed in urllib3 2.0+.
+See: https://github.com/urllib3/urllib3/issues/517
+"""
+
+if urllib3_version < (2, 0, 0):
+ with contextlib.suppress():
+ urllib3.util.IS_SECURETRANSPORT = urllib3.util.ssl_.IS_SECURETRANSPORT = True
+
+
+# Requests will not automatically handle no_proxy by default
+# due to buggy no_proxy handling with proxy dict [1].
+# 1. https://github.com/psf/requests/issues/5000
+requests.adapters.select_proxy = select_proxy
+
+
+class RequestsResponseAdapter(Response):
+ def __init__(self, res: requests.models.Response):
+ super().__init__(
+ fp=res.raw, headers=res.headers, url=res.url,
+ status=res.status_code, reason=res.reason)
+
+ self._requests_response = res
+
+ def read(self, amt: int = None):
+ try:
+ # Interact with urllib3 response directly.
+ return self.fp.read(amt, decode_content=True)
+
+ # See urllib3.response.HTTPResponse.read() for exceptions raised on read
+ except urllib3.exceptions.SSLError as e:
+ raise SSLError(cause=e) from e
+
+ except urllib3.exceptions.IncompleteRead as e:
+ # urllib3 IncompleteRead.partial is always an integer
+ raise IncompleteRead(partial=e.partial, expected=e.expected) from e
+
+ except urllib3.exceptions.ProtocolError as e:
+ # http.client.IncompleteRead may be contained within ProtocolError
+ # See urllib3.response.HTTPResponse._error_catcher()
+ ir_err = next(
+ (err for err in (e.__context__, e.__cause__, *variadic(e.args))
+ if isinstance(err, http.client.IncompleteRead)), None)
+ if ir_err is not None:
+ raise IncompleteRead(partial=len(ir_err.partial), expected=ir_err.expected) from e
+ raise TransportError(cause=e) from e
+
+ except urllib3.exceptions.HTTPError as e:
+ # catch-all for any other urllib3 response exceptions
+ raise TransportError(cause=e) from e
+
+
+class RequestsHTTPAdapter(requests.adapters.HTTPAdapter):
+ def __init__(self, ssl_context=None, proxy_ssl_context=None, source_address=None, **kwargs):
+ self._pm_args = {}
+ if ssl_context:
+ self._pm_args['ssl_context'] = ssl_context
+ if source_address:
+ self._pm_args['source_address'] = (source_address, 0)
+ self._proxy_ssl_context = proxy_ssl_context or ssl_context
+ super().__init__(**kwargs)
+
+ def init_poolmanager(self, *args, **kwargs):
+ return super().init_poolmanager(*args, **kwargs, **self._pm_args)
+
+ def proxy_manager_for(self, proxy, **proxy_kwargs):
+ extra_kwargs = {}
+ if not proxy.lower().startswith('socks') and self._proxy_ssl_context:
+ extra_kwargs['proxy_ssl_context'] = self._proxy_ssl_context
+ return super().proxy_manager_for(proxy, **proxy_kwargs, **self._pm_args, **extra_kwargs)
+
+ def cert_verify(*args, **kwargs):
+ # lean on SSLContext for cert verification
+ pass
+
+
+class RequestsSession(requests.sessions.Session):
+ """
+ Ensure unified redirect method handling with our urllib redirect handler.
+ """
+ def rebuild_method(self, prepared_request, response):
+ new_method = get_redirect_method(prepared_request.method, response.status_code)
+
+ # HACK: requests removes headers/body on redirect unless code was a 307/308.
+ if new_method == prepared_request.method:
+ response._real_status_code = response.status_code
+ response.status_code = 308
+
+ prepared_request.method = new_method
+
+ def rebuild_auth(self, prepared_request, response):
+ # HACK: undo status code change from rebuild_method, if applicable.
+ # rebuild_auth runs after requests would remove headers/body based on status code
+ if hasattr(response, '_real_status_code'):
+ response.status_code = response._real_status_code
+ del response._real_status_code
+ return super().rebuild_auth(prepared_request, response)
+
+
+class Urllib3LoggingFilter(logging.Filter):
+
+ def filter(self, record):
+ # Ignore HTTP request messages since HTTPConnection prints those
+ if record.msg == '%s://%s:%s "%s %s %s" %s %s':
+ return False
+ return True
+
+
+class Urllib3LoggingHandler(logging.Handler):
+ """Redirect urllib3 logs to our logger"""
+ def __init__(self, logger, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+ self._logger = logger
+
+ def emit(self, record):
+ try:
+ msg = self.format(record)
+ if record.levelno >= logging.ERROR:
+ self._logger.error(msg)
+ else:
+ self._logger.stdout(msg)
+
+ except Exception:
+ self.handleError(record)
+
+
+@register_rh
+class RequestsRH(RequestHandler, InstanceStoreMixin):
+
+ """Requests RequestHandler
+ https://github.com/psf/requests
+ """
+ _SUPPORTED_URL_SCHEMES = ('http', 'https')
+ _SUPPORTED_ENCODINGS = tuple(SUPPORTED_ENCODINGS)
+ _SUPPORTED_PROXY_SCHEMES = ('http', 'https', 'socks4', 'socks4a', 'socks5', 'socks5h')
+ _SUPPORTED_FEATURES = (Features.NO_PROXY, Features.ALL_PROXY)
+ RH_NAME = 'requests'
+
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+
+ # Forward urllib3 debug messages to our logger
+ logger = logging.getLogger('urllib3')
+ handler = Urllib3LoggingHandler(logger=self._logger)
+ handler.setFormatter(logging.Formatter('requests: %(message)s'))
+ handler.addFilter(Urllib3LoggingFilter())
+ logger.addHandler(handler)
+ logger.setLevel(logging.WARNING)
+
+ if self.verbose:
+ # Setting this globally is not ideal, but is easier than hacking with urllib3.
+ # It could technically be problematic for scripts embedding yt-dlp.
+ # However, it is unlikely debug traffic is used in that context in a way this will cause problems.
+ urllib3.connection.HTTPConnection.debuglevel = 1
+ logger.setLevel(logging.DEBUG)
+ # this is expected if we are using --no-check-certificate
+ urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
+
+ def close(self):
+ self._clear_instances()
+
+ def _check_extensions(self, extensions):
+ super()._check_extensions(extensions)
+ extensions.pop('cookiejar', None)
+ extensions.pop('timeout', None)
+
+ def _create_instance(self, cookiejar):
+ session = RequestsSession()
+ http_adapter = RequestsHTTPAdapter(
+ ssl_context=self._make_sslcontext(),
+ source_address=self.source_address,
+ max_retries=urllib3.util.retry.Retry(False),
+ )
+ session.adapters.clear()
+ session.headers = requests.models.CaseInsensitiveDict({'Connection': 'keep-alive'})
+ session.mount('https://', http_adapter)
+ session.mount('http://', http_adapter)
+ session.cookies = cookiejar
+ session.trust_env = False # no need, we already load proxies from env
+ return session
+
+ def _send(self, request):
+
+ headers = self._merge_headers(request.headers)
+ add_accept_encoding_header(headers, SUPPORTED_ENCODINGS)
+
+ max_redirects_exceeded = False
+
+ session = self._get_instance(
+ cookiejar=request.extensions.get('cookiejar') or self.cookiejar)
+
+ try:
+ requests_res = session.request(
+ method=request.method,
+ url=request.url,
+ data=request.data,
+ headers=headers,
+ timeout=float(request.extensions.get('timeout') or self.timeout),
+ proxies=request.proxies or self.proxies,
+ allow_redirects=True,
+ stream=True
+ )
+
+ except requests.exceptions.TooManyRedirects as e:
+ max_redirects_exceeded = True
+ requests_res = e.response
+
+ except requests.exceptions.SSLError as e:
+ if 'CERTIFICATE_VERIFY_FAILED' in str(e):
+ raise CertificateVerifyError(cause=e) from e
+ raise SSLError(cause=e) from e
+
+ except requests.exceptions.ProxyError as e:
+ raise ProxyError(cause=e) from e
+
+ except (requests.exceptions.ConnectionError, requests.exceptions.Timeout) as e:
+ raise TransportError(cause=e) from e
+
+ except urllib3.exceptions.HTTPError as e:
+ # Catch any urllib3 exceptions that may leak through
+ raise TransportError(cause=e) from e
+
+ except requests.exceptions.RequestException as e:
+ # Miscellaneous Requests exceptions. May not necessary be network related e.g. InvalidURL
+ raise RequestError(cause=e) from e
+
+ res = RequestsResponseAdapter(requests_res)
+
+ if not 200 <= res.status < 300:
+ raise HTTPError(res, redirect_loop=max_redirects_exceeded)
+
+ return res
+
+
+@register_preference(RequestsRH)
+def requests_preference(rh, request):
+ return 100
+
+
+# Use our socks proxy implementation with requests to avoid an extra dependency.
+class SocksHTTPConnection(urllib3.connection.HTTPConnection):
+ def __init__(self, _socks_options, *args, **kwargs): # must use _socks_options to pass PoolKey checks
+ self._proxy_args = _socks_options
+ super().__init__(*args, **kwargs)
+
+ def _new_conn(self):
+ try:
+ return create_connection(
+ address=(self._proxy_args['addr'], self._proxy_args['port']),
+ timeout=self.timeout,
+ source_address=self.source_address,
+ _create_socket_func=functools.partial(
+ create_socks_proxy_socket, (self.host, self.port), self._proxy_args))
+ except (socket.timeout, TimeoutError) as e:
+ raise urllib3.exceptions.ConnectTimeoutError(
+ self, f'Connection to {self.host} timed out. (connect timeout={self.timeout})') from e
+ except SocksProxyError as e:
+ raise urllib3.exceptions.ProxyError(str(e), e) from e
+ except (OSError, socket.error) as e:
+ raise urllib3.exceptions.NewConnectionError(
+ self, f'Failed to establish a new connection: {e}') from e
+
+
+class SocksHTTPSConnection(SocksHTTPConnection, urllib3.connection.HTTPSConnection):
+ pass
+
+
+class SocksHTTPConnectionPool(urllib3.HTTPConnectionPool):
+ ConnectionCls = SocksHTTPConnection
+
+
+class SocksHTTPSConnectionPool(urllib3.HTTPSConnectionPool):
+ ConnectionCls = SocksHTTPSConnection
+
+
+class SocksProxyManager(urllib3.PoolManager):
+
+ def __init__(self, socks_proxy, username=None, password=None, num_pools=10, headers=None, **connection_pool_kw):
+ connection_pool_kw['_socks_options'] = make_socks_proxy_opts(socks_proxy)
+ super().__init__(num_pools, headers, **connection_pool_kw)
+ self.pool_classes_by_scheme = {
+ 'http': SocksHTTPConnectionPool,
+ 'https': SocksHTTPSConnectionPool
+ }
+
+
+requests.adapters.SOCKSProxyManager = SocksProxyManager
diff --git a/yt_dlp/networking/_urllib.py b/yt_dlp/networking/_urllib.py
index 9e2bf33e45..68bab2b087 100644
--- a/yt_dlp/networking/_urllib.py
+++ b/yt_dlp/networking/_urllib.py
@@ -3,7 +3,6 @@ from __future__ import annotations
import functools
import http.client
import io
-import socket
import ssl
import urllib.error
import urllib.parse
@@ -24,6 +23,7 @@ from ._helper import (
InstanceStoreMixin,
add_accept_encoding_header,
create_connection,
+ create_socks_proxy_socket,
get_redirect_method,
make_socks_proxy_opts,
select_proxy,
@@ -40,7 +40,6 @@ from .exceptions import (
)
from ..dependencies import brotli
from ..socks import ProxyError as SocksProxyError
-from ..socks import sockssocket
from ..utils import update_url_query
from ..utils.networking import normalize_url
@@ -190,25 +189,12 @@ def make_socks_conn_class(base_class, socks_proxy):
_create_connection = create_connection
def connect(self):
- def sock_socket_connect(ip_addr, timeout, source_address):
- af, socktype, proto, canonname, sa = ip_addr
- sock = sockssocket(af, socktype, proto)
- try:
- connect_proxy_args = proxy_args.copy()
- connect_proxy_args.update({'addr': sa[0], 'port': sa[1]})
- sock.setproxy(**connect_proxy_args)
- if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT: # noqa: E721
- sock.settimeout(timeout)
- if source_address:
- sock.bind(source_address)
- sock.connect((self.host, self.port))
- return sock
- except socket.error:
- sock.close()
- raise
self.sock = create_connection(
- (proxy_args['addr'], proxy_args['port']), timeout=self.timeout,
- source_address=self.source_address, _create_socket_func=sock_socket_connect)
+ (proxy_args['addr'], proxy_args['port']),
+ timeout=self.timeout,
+ source_address=self.source_address,
+ _create_socket_func=functools.partial(
+ create_socks_proxy_socket, (self.host, self.port), proxy_args))
if isinstance(self, http.client.HTTPSConnection):
self.sock = self._context.wrap_socket(self.sock, server_hostname=self.host)
diff --git a/yt_dlp/options.py b/yt_dlp/options.py
index 85a6402a6d..4254974fcb 100644
--- a/yt_dlp/options.py
+++ b/yt_dlp/options.py
@@ -471,11 +471,12 @@ def create_parser():
'no-attach-info-json', 'embed-thumbnail-atomicparsley', 'no-external-downloader-progress',
'embed-metadata', 'seperate-video-versions', 'no-clean-infojson', 'no-keep-subs', 'no-certifi',
'no-youtube-channel-redirect', 'no-youtube-unavailable-videos', 'no-youtube-prefer-utc-upload-date',
+ 'prefer-legacy-http-handler'
}, 'aliases': {
'youtube-dl': ['all', '-multistreams', '-playlist-match-filter'],
'youtube-dlc': ['all', '-no-youtube-channel-redirect', '-no-live-chat', '-playlist-match-filter'],
'2021': ['2022', 'no-certifi', 'filename-sanitization', 'no-youtube-prefer-utc-upload-date'],
- '2022': ['no-external-downloader-progress', 'playlist-match-filter'],
+ '2022': ['no-external-downloader-progress', 'playlist-match-filter', 'prefer-legacy-http-handler'],
}
}, help=(
'Options that can help keep compatibility with youtube-dl or youtube-dlc '