[networking] Rewrite architecture (#2861)

New networking interface consists of a `RequestDirector` that directs
each `Request` to appropriate `RequestHandler` and returns the
`Response` or raises `RequestError`. The handlers define adapters to
transform its internal Request/Response/Errors to our interfaces.

User-facing changes:
- Fix issues with per request proxies on redirects for urllib
- Support for `ALL_PROXY` environment variable for proxy setting
- Support for `socks5h` proxy
   - Closes https://github.com/yt-dlp/yt-dlp/issues/6325, https://github.com/ytdl-org/youtube-dl/issues/22618, https://github.com/ytdl-org/youtube-dl/pull/28093
- Raise error when using `https` proxy instead of silently converting it to `http`

Authored by: coletdjnz
This commit is contained in:
coletdjnz
2023-07-15 15:55:23 +05:30
committed by pukkandan
parent c365dba843
commit 227bf1a33b
16 changed files with 2586 additions and 474 deletions

View File

@@ -10,10 +10,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import collections
import hashlib
import http.client
import json
import socket
import urllib.error
from test.helper import (
assertGreaterEqual,
@@ -29,6 +26,7 @@ from test.helper import (
import yt_dlp.YoutubeDL # isort: split
from yt_dlp.extractor import get_info_extractor
from yt_dlp.networking.exceptions import HTTPError, TransportError
from yt_dlp.utils import (
DownloadError,
ExtractorError,
@@ -162,8 +160,7 @@ def generator(test_case, tname):
force_generic_extractor=params.get('force_generic_extractor', False))
except (DownloadError, ExtractorError) as err:
# Check if the exception is not a network related one
if (err.exc_info[0] not in (urllib.error.URLError, socket.timeout, UnavailableVideoError, http.client.BadStatusLine)
or (err.exc_info[0] == urllib.error.HTTPError and err.exc_info[1].code == 503)):
if not isinstance(err.exc_info[1], (TransportError, UnavailableVideoError)) or (isinstance(err.exc_info[1], HTTPError) and err.exc_info[1].code == 503):
err.msg = f'{getattr(err, "msg", err)} ({tname})'
raise
@@ -249,7 +246,7 @@ def generator(test_case, tname):
# extractor returns full results even with extract_flat
res_tcs = [{'info_dict': e} for e in res_dict['entries']]
try_rm_tcs_files(res_tcs)
ydl.close()
return test_template

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,239 @@
#!/usr/bin/env python3
# Allow direct execution
import os
import sys
import pytest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import io
import platform
import random
import ssl
import urllib.error
from yt_dlp.cookies import YoutubeDLCookieJar
from yt_dlp.dependencies import certifi
from yt_dlp.networking import Response
from yt_dlp.networking._helper import (
InstanceStoreMixin,
add_accept_encoding_header,
get_redirect_method,
make_socks_proxy_opts,
select_proxy,
ssl_load_certs,
)
from yt_dlp.networking.exceptions import (
HTTPError,
IncompleteRead,
_CompatHTTPError,
)
from yt_dlp.socks import ProxyType
from yt_dlp.utils.networking import HTTPHeaderDict
TEST_DIR = os.path.dirname(os.path.abspath(__file__))
class TestNetworkingUtils:
def test_select_proxy(self):
proxies = {
'all': 'socks5://example.com',
'http': 'http://example.com:1080',
'no': 'bypass.example.com,yt-dl.org'
}
assert select_proxy('https://example.com', proxies) == proxies['all']
assert select_proxy('http://example.com', proxies) == proxies['http']
assert select_proxy('http://bypass.example.com', proxies) is None
assert select_proxy('https://yt-dl.org', proxies) is None
@pytest.mark.parametrize('socks_proxy,expected', [
('socks5h://example.com', {
'proxytype': ProxyType.SOCKS5,
'addr': 'example.com',
'port': 1080,
'rdns': True,
'username': None,
'password': None
}),
('socks5://user:@example.com:5555', {
'proxytype': ProxyType.SOCKS5,
'addr': 'example.com',
'port': 5555,
'rdns': False,
'username': 'user',
'password': ''
}),
('socks4://u%40ser:pa%20ss@127.0.0.1:1080', {
'proxytype': ProxyType.SOCKS4,
'addr': '127.0.0.1',
'port': 1080,
'rdns': False,
'username': 'u@ser',
'password': 'pa ss'
}),
('socks4a://:pa%20ss@127.0.0.1', {
'proxytype': ProxyType.SOCKS4A,
'addr': '127.0.0.1',
'port': 1080,
'rdns': True,
'username': '',
'password': 'pa ss'
})
])
def test_make_socks_proxy_opts(self, socks_proxy, expected):
assert make_socks_proxy_opts(socks_proxy) == expected
def test_make_socks_proxy_unknown(self):
with pytest.raises(ValueError, match='Unknown SOCKS proxy version: socks'):
make_socks_proxy_opts('socks://127.0.0.1')
@pytest.mark.skipif(not certifi, reason='certifi is not installed')
def test_load_certifi(self):
context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
context2 = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
ssl_load_certs(context, use_certifi=True)
context2.load_verify_locations(cafile=certifi.where())
assert context.get_ca_certs() == context2.get_ca_certs()
# Test load normal certs
# XXX: could there be a case where system certs are the same as certifi?
context3 = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
ssl_load_certs(context3, use_certifi=False)
assert context3.get_ca_certs() != context.get_ca_certs()
@pytest.mark.parametrize('method,status,expected', [
('GET', 303, 'GET'),
('HEAD', 303, 'HEAD'),
('PUT', 303, 'GET'),
('POST', 301, 'GET'),
('HEAD', 301, 'HEAD'),
('POST', 302, 'GET'),
('HEAD', 302, 'HEAD'),
('PUT', 302, 'PUT'),
('POST', 308, 'POST'),
('POST', 307, 'POST'),
('HEAD', 308, 'HEAD'),
('HEAD', 307, 'HEAD'),
])
def test_get_redirect_method(self, method, status, expected):
assert get_redirect_method(method, status) == expected
@pytest.mark.parametrize('headers,supported_encodings,expected', [
({'Accept-Encoding': 'br'}, ['gzip', 'br'], {'Accept-Encoding': 'br'}),
({}, ['gzip', 'br'], {'Accept-Encoding': 'gzip, br'}),
({'Content-type': 'application/json'}, [], {'Content-type': 'application/json', 'Accept-Encoding': 'identity'}),
])
def test_add_accept_encoding_header(self, headers, supported_encodings, expected):
headers = HTTPHeaderDict(headers)
add_accept_encoding_header(headers, supported_encodings)
assert headers == HTTPHeaderDict(expected)
class TestInstanceStoreMixin:
class FakeInstanceStoreMixin(InstanceStoreMixin):
def _create_instance(self, **kwargs):
return random.randint(0, 1000000)
def _close_instance(self, instance):
pass
def test_mixin(self):
mixin = self.FakeInstanceStoreMixin()
assert mixin._get_instance(d={'a': 1, 'b': 2, 'c': {'d', 4}}) == mixin._get_instance(d={'a': 1, 'b': 2, 'c': {'d', 4}})
assert mixin._get_instance(d={'a': 1, 'b': 2, 'c': {'e', 4}}) != mixin._get_instance(d={'a': 1, 'b': 2, 'c': {'d', 4}})
assert mixin._get_instance(d={'a': 1, 'b': 2, 'c': {'d', 4}} != mixin._get_instance(d={'a': 1, 'b': 2, 'g': {'d', 4}}))
assert mixin._get_instance(d={'a': 1}, e=[1, 2, 3]) == mixin._get_instance(d={'a': 1}, e=[1, 2, 3])
assert mixin._get_instance(d={'a': 1}, e=[1, 2, 3]) != mixin._get_instance(d={'a': 1}, e=[1, 2, 3, 4])
cookiejar = YoutubeDLCookieJar()
assert mixin._get_instance(b=[1, 2], c=cookiejar) == mixin._get_instance(b=[1, 2], c=cookiejar)
assert mixin._get_instance(b=[1, 2], c=cookiejar) != mixin._get_instance(b=[1, 2], c=YoutubeDLCookieJar())
# Different order
assert mixin._get_instance(c=cookiejar, b=[1, 2]) == mixin._get_instance(b=[1, 2], c=cookiejar)
m = mixin._get_instance(t=1234)
assert mixin._get_instance(t=1234) == m
mixin._clear_instances()
assert mixin._get_instance(t=1234) != m
class TestNetworkingExceptions:
@staticmethod
def create_response(status):
return Response(fp=io.BytesIO(b'test'), url='http://example.com', headers={'tesT': 'test'}, status=status)
@pytest.mark.parametrize('http_error_class', [HTTPError, lambda r: _CompatHTTPError(HTTPError(r))])
def test_http_error(self, http_error_class):
response = self.create_response(403)
error = http_error_class(response)
assert error.status == 403
assert str(error) == error.msg == 'HTTP Error 403: Forbidden'
assert error.reason == response.reason
assert error.response is response
data = error.response.read()
assert data == b'test'
assert repr(error) == '<HTTPError 403: Forbidden>'
@pytest.mark.parametrize('http_error_class', [HTTPError, lambda *args, **kwargs: _CompatHTTPError(HTTPError(*args, **kwargs))])
def test_redirect_http_error(self, http_error_class):
response = self.create_response(301)
error = http_error_class(response, redirect_loop=True)
assert str(error) == error.msg == 'HTTP Error 301: Moved Permanently (redirect loop detected)'
assert error.reason == 'Moved Permanently'
def test_compat_http_error(self):
response = self.create_response(403)
error = _CompatHTTPError(HTTPError(response))
assert isinstance(error, HTTPError)
assert isinstance(error, urllib.error.HTTPError)
assert error.code == 403
assert error.getcode() == 403
assert error.hdrs is error.response.headers
assert error.info() is error.response.headers
assert error.headers is error.response.headers
assert error.filename == error.response.url
assert error.url == error.response.url
assert error.geturl() == error.response.url
# Passthrough file operations
assert error.read() == b'test'
assert not error.closed
# Technically Response operations are also passed through, which should not be used.
assert error.get_header('test') == 'test'
@pytest.mark.skipif(
platform.python_implementation() == 'PyPy', reason='garbage collector works differently in pypy')
def test_compat_http_error_autoclose(self):
# Compat HTTPError should not autoclose response
response = self.create_response(403)
_CompatHTTPError(HTTPError(response))
assert not response.closed
def test_incomplete_read_error(self):
error = IncompleteRead(b'test', 3, cause='test')
assert isinstance(error, IncompleteRead)
assert repr(error) == '<IncompleteRead: 4 bytes read, 3 more expected>'
assert str(error) == error.msg == '4 bytes read, 3 more expected'
assert error.partial == b'test'
assert error.expected == 3
assert error.cause == 'test'
error = IncompleteRead(b'aaa')
assert repr(error) == '<IncompleteRead: 3 bytes read>'
assert str(error) == '3 bytes read'

View File

@@ -51,6 +51,7 @@ from yt_dlp.utils import (
escape_url,
expand_path,
extract_attributes,
extract_basic_auth,
find_xpath_attr,
fix_xml_ampersands,
float_or_none,
@@ -103,7 +104,6 @@ from yt_dlp.utils import (
sanitize_filename,
sanitize_path,
sanitize_url,
sanitized_Request,
shell_quote,
smuggle_url,
str_or_none,
@@ -132,6 +132,7 @@ from yt_dlp.utils import (
xpath_text,
xpath_with_ns,
)
from yt_dlp.utils.networking import HTTPHeaderDict
class TestUtil(unittest.TestCase):
@@ -2315,14 +2316,43 @@ Line 1
self.assertEqual(traverse_obj(mobj, lambda k, _: k in (0, 'group')), ['0123', '3'],
msg='function on a `re.Match` should give group name as well')
def test_http_header_dict(self):
headers = HTTPHeaderDict()
headers['ytdl-test'] = 1
self.assertEqual(list(headers.items()), [('Ytdl-Test', '1')])
headers['Ytdl-test'] = '2'
self.assertEqual(list(headers.items()), [('Ytdl-Test', '2')])
self.assertTrue('ytDl-Test' in headers)
self.assertEqual(str(headers), str(dict(headers)))
self.assertEqual(repr(headers), str(dict(headers)))
headers.update({'X-dlp': 'data'})
self.assertEqual(set(headers.items()), {('Ytdl-Test', '2'), ('X-Dlp', 'data')})
self.assertEqual(dict(headers), {'Ytdl-Test': '2', 'X-Dlp': 'data'})
self.assertEqual(len(headers), 2)
self.assertEqual(headers.copy(), headers)
headers2 = HTTPHeaderDict({'X-dlp': 'data3'}, **headers, **{'X-dlp': 'data2'})
self.assertEqual(set(headers2.items()), {('Ytdl-Test', '2'), ('X-Dlp', 'data2')})
self.assertEqual(len(headers2), 2)
headers2.clear()
self.assertEqual(len(headers2), 0)
# ensure we prefer latter headers
headers3 = HTTPHeaderDict({'Ytdl-TeSt': 1}, {'Ytdl-test': 2})
self.assertEqual(set(headers3.items()), {('Ytdl-Test', '2')})
del headers3['ytdl-tesT']
self.assertEqual(dict(headers3), {})
headers4 = HTTPHeaderDict({'ytdl-test': 'data;'})
self.assertEqual(set(headers4.items()), {('Ytdl-Test', 'data;')})
def test_extract_basic_auth(self):
auth_header = lambda url: sanitized_Request(url).get_header('Authorization')
self.assertFalse(auth_header('http://foo.bar'))
self.assertFalse(auth_header('http://:foo.bar'))
self.assertEqual(auth_header('http://@foo.bar'), 'Basic Og==')
self.assertEqual(auth_header('http://:pass@foo.bar'), 'Basic OnBhc3M=')
self.assertEqual(auth_header('http://user:@foo.bar'), 'Basic dXNlcjo=')
self.assertEqual(auth_header('http://user:pass@foo.bar'), 'Basic dXNlcjpwYXNz')
assert extract_basic_auth('http://:foo.bar') == ('http://:foo.bar', None)
assert extract_basic_auth('http://foo.bar') == ('http://foo.bar', None)
assert extract_basic_auth('http://@foo.bar') == ('http://foo.bar', 'Basic Og==')
assert extract_basic_auth('http://:pass@foo.bar') == ('http://foo.bar', 'Basic OnBhc3M=')
assert extract_basic_auth('http://user:@foo.bar') == ('http://foo.bar', 'Basic dXNlcjo=')
assert extract_basic_auth('http://user:pass@foo.bar') == ('http://foo.bar', 'Basic dXNlcjpwYXNz')
if __name__ == '__main__':