Skip to content

Commit

Permalink
Merge branch 'yt-dlp:master' into master
Browse files Browse the repository at this point in the history
  • Loading branch information
observeroftime01 authored Jan 22, 2024
2 parents a93b679 + f0e8bc7 commit ca0db11
Show file tree
Hide file tree
Showing 16 changed files with 626 additions and 231 deletions.
25 changes: 16 additions & 9 deletions test/test_networking.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,12 @@ def do_GET(self):
self.send_header('Location', '/a/b/./../../headers')
self.send_header('Content-Length', '0')
self.end_headers()
elif self.path == '/redirect_dotsegments_absolute':
self.send_response(301)
# redirect to /headers but with dot segments before - absolute url
self.send_header('Location', f'http://127.0.0.1:{http_server_port(self.server)}/a/b/./../../headers')
self.send_header('Content-Length', '0')
self.end_headers()
elif self.path.startswith('/redirect_'):
self._redirect()
elif self.path.startswith('/method'):
Expand Down Expand Up @@ -345,16 +351,17 @@ def test_percent_encode(self, handler):
res.close()

@pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
def test_remove_dot_segments(self, handler):
with handler() as rh:
@pytest.mark.parametrize('path', [
'/a/b/./../../headers',
'/redirect_dotsegments',
# https://github.com/yt-dlp/yt-dlp/issues/9020
'/redirect_dotsegments_absolute',
])
def test_remove_dot_segments(self, handler, path):
with handler(verbose=True) as rh:
# This isn't a comprehensive test,
# but it should be enough to check whether the handler is removing dot segments
res = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/a/b/./../../headers'))
assert res.status == 200
assert res.url == f'http://127.0.0.1:{self.http_port}/headers'
res.close()

res = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/redirect_dotsegments'))
# but it should be enough to check whether the handler is removing dot segments in required scenarios
res = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}{path}'))
assert res.status == 200
assert res.url == f'http://127.0.0.1:{self.http_port}/headers'
res.close()
Expand Down
82 changes: 4 additions & 78 deletions test/test_networking_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,9 @@

sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

import contextlib
import io
import platform
import random
import ssl
import urllib.error
import warnings

from yt_dlp.cookies import YoutubeDLCookieJar
from yt_dlp.dependencies import certifi
Expand All @@ -30,7 +26,6 @@
from yt_dlp.networking.exceptions import (
HTTPError,
IncompleteRead,
_CompatHTTPError,
)
from yt_dlp.socks import ProxyType
from yt_dlp.utils.networking import HTTPHeaderDict
Expand Down Expand Up @@ -179,11 +174,10 @@ class TestNetworkingExceptions:
def create_response(status):
return Response(fp=io.BytesIO(b'test'), url='http://example.com', headers={'tesT': 'test'}, status=status)

@pytest.mark.parametrize('http_error_class', [HTTPError, lambda r: _CompatHTTPError(HTTPError(r))])
def test_http_error(self, http_error_class):
def test_http_error(self):

response = self.create_response(403)
error = http_error_class(response)
error = HTTPError(response)

assert error.status == 403
assert str(error) == error.msg == 'HTTP Error 403: Forbidden'
Expand All @@ -194,80 +188,12 @@ def test_http_error(self, http_error_class):
assert data == b'test'
assert repr(error) == '<HTTPError 403: Forbidden>'

@pytest.mark.parametrize('http_error_class', [HTTPError, lambda *args, **kwargs: _CompatHTTPError(HTTPError(*args, **kwargs))])
def test_redirect_http_error(self, http_error_class):
def test_redirect_http_error(self):
response = self.create_response(301)
error = http_error_class(response, redirect_loop=True)
error = HTTPError(response, redirect_loop=True)
assert str(error) == error.msg == 'HTTP Error 301: Moved Permanently (redirect loop detected)'
assert error.reason == 'Moved Permanently'

def test_compat_http_error(self):
response = self.create_response(403)
error = _CompatHTTPError(HTTPError(response))
assert isinstance(error, HTTPError)
assert isinstance(error, urllib.error.HTTPError)

@contextlib.contextmanager
def raises_deprecation_warning():
with warnings.catch_warnings(record=True) as w:
warnings.simplefilter('always')
yield

if len(w) == 0:
pytest.fail('Did not raise DeprecationWarning')
if len(w) > 1:
pytest.fail(f'Raised multiple warnings: {w}')

if not issubclass(w[-1].category, DeprecationWarning):
pytest.fail(f'Expected DeprecationWarning, got {w[-1].category}')
w.clear()

with raises_deprecation_warning():
assert error.code == 403

with raises_deprecation_warning():
assert error.getcode() == 403

with raises_deprecation_warning():
assert error.hdrs is error.response.headers

with raises_deprecation_warning():
assert error.info() is error.response.headers

with raises_deprecation_warning():
assert error.headers is error.response.headers

with raises_deprecation_warning():
assert error.filename == error.response.url

with raises_deprecation_warning():
assert error.url == error.response.url

with raises_deprecation_warning():
assert error.geturl() == error.response.url

# Passthrough file operations
with raises_deprecation_warning():
assert error.read() == b'test'

with raises_deprecation_warning():
assert not error.closed

with raises_deprecation_warning():
# Technically Response operations are also passed through, which should not be used.
assert error.get_header('test') == 'test'

# Should not raise a warning
error.close()

@pytest.mark.skipif(
platform.python_implementation() == 'PyPy', reason='garbage collector works differently in pypy')
def test_compat_http_error_autoclose(self):
# Compat HTTPError should not autoclose response
response = self.create_response(403)
_CompatHTTPError(HTTPError(response))
assert not response.closed

def test_incomplete_read_error(self):
error = IncompleteRead(4, 3, cause='test')
assert isinstance(error, IncompleteRead)
Expand Down
3 changes: 0 additions & 3 deletions yt_dlp/YoutubeDL.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@
NoSupportingHandlers,
RequestError,
SSLError,
_CompatHTTPError,
network_exceptions,
)
from .plugins import directories as plugin_directories
Expand Down Expand Up @@ -4112,8 +4111,6 @@ def urlopen(self, req):
'SSLV3_ALERT_HANDSHAKE_FAILURE: The server may not support the current cipher list. '
'Try using --legacy-server-connect', cause=e) from e
raise
except HTTPError as e: # TODO: Remove in a future release
raise _CompatHTTPError(e) from e

def build_request_director(self, handlers, preferences=None):
logger = _YDLLogger(self)
Expand Down
4 changes: 2 additions & 2 deletions yt_dlp/compat/_legacy.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
from ..dependencies import brotli as compat_brotli # noqa: F401
from ..dependencies import websockets as compat_websockets # noqa: F401
from ..dependencies.Cryptodome import AES as compat_pycrypto_AES # noqa: F401
from ..networking.exceptions import HTTPError as compat_HTTPError # noqa: F401

passthrough_module(__name__, '...utils', ('WINDOWS_VT_MODE', 'windows_enable_vt_mode'))

Expand Down Expand Up @@ -70,7 +71,6 @@ def compat_setenv(key, value, env=os.environ):
compat_HTMLParser = compat_html_parser_HTMLParser = html.parser.HTMLParser
compat_http_client = http.client
compat_http_server = http.server
compat_HTTPError = urllib.error.HTTPError
compat_input = input
compat_integer_types = (int, )
compat_itertools_count = itertools.count
Expand All @@ -88,7 +88,7 @@ def compat_setenv(key, value, env=os.environ):
compat_subprocess_get_DEVNULL = lambda: subprocess.DEVNULL
compat_tokenize_tokenize = tokenize.tokenize
compat_urllib_error = urllib.error
compat_urllib_HTTPError = urllib.error.HTTPError
compat_urllib_HTTPError = compat_HTTPError
compat_urllib_parse = urllib.parse
compat_urllib_parse_parse_qs = urllib.parse.parse_qs
compat_urllib_parse_quote = urllib.parse.quote
Expand Down
7 changes: 5 additions & 2 deletions yt_dlp/downloader/hls.py
Original file line number Diff line number Diff line change
Expand Up @@ -369,7 +369,10 @@ def fin_fragments():

return output.getvalue().encode()

self.download_and_append_fragments(
ctx, fragments, info_dict, pack_func=pack_fragment, finish_func=fin_fragments)
if len(fragments) == 1:
self.download_and_append_fragments(ctx, fragments, info_dict)
else:
self.download_and_append_fragments(
ctx, fragments, info_dict, pack_func=pack_fragment, finish_func=fin_fragments)
else:
return self.download_and_append_fragments(ctx, fragments, info_dict)
8 changes: 8 additions & 0 deletions yt_dlp/extractor/_extractors.py
Original file line number Diff line number Diff line change
Expand Up @@ -1111,6 +1111,7 @@
MotherlessIE,
MotherlessGroupIE,
MotherlessGalleryIE,
MotherlessUploaderIE,
)
from .motorsport import MotorsportIE
from .moviepilot import MoviepilotIE
Expand All @@ -1137,6 +1138,11 @@
MusicdexArtistIE,
MusicdexPlaylistIE,
)
from .mx3 import (
Mx3IE,
Mx3NeoIE,
Mx3VolksmusikIE,
)
from .mxplayer import (
MxplayerIE,
MxplayerShowIE,
Expand Down Expand Up @@ -1593,6 +1599,7 @@
RedBullIE,
)
from .reddit import RedditIE
from .redge import RedCDNLivxIE
from .redgifs import (
RedGifsIE,
RedGifsSearchIE,
Expand Down Expand Up @@ -1727,6 +1734,7 @@
)
from .scrolller import ScrolllerIE
from .seeker import SeekerIE
from .sejmpl import SejmIE
from .senalcolombia import SenalColombiaLiveIE
from .senategov import SenateISVPIE, SenateGovIE
from .sendtonews import SendtoNewsIE
Expand Down
27 changes: 22 additions & 5 deletions yt_dlp/extractor/ard.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
determine_ext,
int_or_none,
join_nonempty,
jwt_decode_hs256,
make_archive_id,
parse_duration,
parse_iso8601,
Expand Down Expand Up @@ -238,6 +239,7 @@ class ARDBetaMediathekIE(InfoExtractor):
(?P<id>[a-zA-Z0-9]+)
/?(?:[?#]|$)'''
_GEO_COUNTRIES = ['DE']
_TOKEN_URL = 'https://sso.ardmediathek.de/sso/token'

_TESTS = [{
'url': 'https://www.ardmediathek.de/video/filme-im-mdr/liebe-auf-vier-pfoten/mdr-fernsehen/Y3JpZDovL21kci5kZS9zZW5kdW5nLzI4MjA0MC80MjIwOTEtNDAyNTM0',
Expand Down Expand Up @@ -359,12 +361,27 @@ def _extract_episode_info(self, title):

def _real_extract(self, url):
display_id = self._match_id(url)
query = {'embedded': 'false', 'mcV6': 'true'}
headers = {}

if self._get_cookies(self._TOKEN_URL).get('ams'):
token = self._download_json(
self._TOKEN_URL, display_id, 'Fetching token for age verification',
'Unable to fetch age verification token', fatal=False)
id_token = traverse_obj(token, ('idToken', {str}))
decoded_token = traverse_obj(id_token, ({jwt_decode_hs256}, {dict}))
user_id = traverse_obj(decoded_token, (('user_id', 'sub'), {str}), get_all=False)
if not user_id:
self.report_warning('Unable to extract token, continuing without authentication')
else:
headers['x-authorization'] = f'Bearer {id_token}'
query['userId'] = user_id
if decoded_token.get('age_rating') != 18:
self.report_warning('Account is not verified as 18+; video may be unavailable')

page_data = self._download_json(
f'https://api.ardmediathek.de/page-gateway/pages/ard/item/{display_id}', display_id, query={
'embedded': 'false',
'mcV6': 'true',
})
f'https://api.ardmediathek.de/page-gateway/pages/ard/item/{display_id}',
display_id, query=query, headers=headers)

# For user convenience we use the old contentId instead of the longer crid
# Ref: https://github.com/yt-dlp/yt-dlp/issues/8731#issuecomment-1874398283
Expand All @@ -383,7 +400,7 @@ def _real_extract(self, url):
media_data = traverse_obj(player_data, ('mediaCollection', 'embedded', {dict}))

if player_data.get('blockedByFsk'):
self.raise_no_formats('This video is only available after 22:00', expected=True)
self.raise_login_required('This video is only available for age verified users or after 22:00')

formats = []
subtitles = {}
Expand Down
31 changes: 28 additions & 3 deletions yt_dlp/extractor/motherless.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,7 @@ def _real_extract(self, url):


class MotherlessPaginatedIE(InfoExtractor):
_EXTRA_QUERY = {}
_PAGE_SIZE = 60

def _correct_path(self, url, item_id):
Expand All @@ -199,7 +200,7 @@ def _real_extract(self, url):
def get_page(idx):
page = idx + 1
current_page = webpage if not idx else self._download_webpage(
real_url, item_id, note=f'Downloading page {page}', query={'page': page})
real_url, item_id, note=f'Downloading page {page}', query={'page': page, **self._EXTRA_QUERY})
yield from self._extract_entries(current_page, real_url)

return self.playlist_result(
Expand All @@ -213,7 +214,7 @@ class MotherlessGroupIE(MotherlessPaginatedIE):
'url': 'http://motherless.com/gv/movie_scenes',
'info_dict': {
'id': 'movie_scenes',
'title': 'Movie Scenes',
'title': 'Movie Scenes - Videos - Hot and sexy scenes from "regular" movies... Beautiful actresses fully',
},
'playlist_mincount': 540,
}, {
Expand Down Expand Up @@ -244,7 +245,7 @@ class MotherlessGalleryIE(MotherlessPaginatedIE):
'id': '338999F',
'title': 'Random',
},
'playlist_mincount': 190,
'playlist_mincount': 171,
}, {
'url': 'https://motherless.com/GVABD6213',
'info_dict': {
Expand All @@ -270,3 +271,27 @@ class MotherlessGalleryIE(MotherlessPaginatedIE):

def _correct_path(self, url, item_id):
return urllib.parse.urljoin(url, f'/GV{item_id}')


class MotherlessUploaderIE(MotherlessPaginatedIE):
_VALID_URL = r'https?://(?:www\.)?motherless\.com/u/(?P<id>\w+)/?(?:$|[?#])'
_TESTS = [{
'url': 'https://motherless.com/u/Mrgo4hrs2023',
'info_dict': {
'id': 'Mrgo4hrs2023',
'title': "Mrgo4hrs2023's Uploads - Videos",
},
'playlist_mincount': 32,
}, {
'url': 'https://motherless.com/u/Happy_couple?t=v',
'info_dict': {
'id': 'Happy_couple',
'title': "Happy_couple's Uploads - Videos",
},
'playlist_mincount': 8,
}]

_EXTRA_QUERY = {'t': 'v'}

def _correct_path(self, url, item_id):
return urllib.parse.urljoin(url, f'/u/{item_id}?t=v')
Loading

0 comments on commit ca0db11

Please sign in to comment.