Merge branch 'yt-dlp:master' into master

observeroftime01 · Jan 22, 2024 · ca0db11 · ca0db11
2 parents a93b679 + f0e8bc7
commit ca0db11
Show file tree

Hide file tree

Showing 16 changed files with 626 additions and 231 deletions.
diff --git a/test/test_networking.py b/test/test_networking.py
@@ -180,6 +180,12 @@ def do_GET(self):
             self.send_header('Location', '/a/b/./../../headers')
             self.send_header('Content-Length', '0')
             self.end_headers()
+        elif self.path == '/redirect_dotsegments_absolute':
+            self.send_response(301)
+            # redirect to /headers but with dot segments before - absolute url
+            self.send_header('Location', f'http://127.0.0.1:{http_server_port(self.server)}/a/b/./../../headers')
+            self.send_header('Content-Length', '0')
+            self.end_headers()
         elif self.path.startswith('/redirect_'):
             self._redirect()
         elif self.path.startswith('/method'):
@@ -345,16 +351,17 @@ def test_percent_encode(self, handler):
             res.close()
 
     @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
-    def test_remove_dot_segments(self, handler):
-        with handler() as rh:
+    @pytest.mark.parametrize('path', [
+        '/a/b/./../../headers',
+        '/redirect_dotsegments',
+        # https://github.com/yt-dlp/yt-dlp/issues/9020
+        '/redirect_dotsegments_absolute',
+    ])
+    def test_remove_dot_segments(self, handler, path):
+        with handler(verbose=True) as rh:
             # This isn't a comprehensive test,
-            # but it should be enough to check whether the handler is removing dot segments
-            res = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/a/b/./../../headers'))
-            assert res.status == 200
-            assert res.url == f'http://127.0.0.1:{self.http_port}/headers'
-            res.close()
-
-            res = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/redirect_dotsegments'))
+            # but it should be enough to check whether the handler is removing dot segments in required scenarios
+            res = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}{path}'))
             assert res.status == 200
             assert res.url == f'http://127.0.0.1:{self.http_port}/headers'
             res.close()

diff --git a/test/test_networking_utils.py b/test/test_networking_utils.py
@@ -8,13 +8,9 @@
 
 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
-import contextlib
 import io
-import platform
 import random
 import ssl
-import urllib.error
-import warnings
 
 from yt_dlp.cookies import YoutubeDLCookieJar
 from yt_dlp.dependencies import certifi
@@ -30,7 +26,6 @@
 from yt_dlp.networking.exceptions import (
     HTTPError,
     IncompleteRead,
-    _CompatHTTPError,
 )
 from yt_dlp.socks import ProxyType
 from yt_dlp.utils.networking import HTTPHeaderDict
@@ -179,11 +174,10 @@ class TestNetworkingExceptions:
     def create_response(status):
         return Response(fp=io.BytesIO(b'test'), url='http://example.com', headers={'tesT': 'test'}, status=status)
 
-    @pytest.mark.parametrize('http_error_class', [HTTPError, lambda r: _CompatHTTPError(HTTPError(r))])
-    def test_http_error(self, http_error_class):
+    def test_http_error(self):
 
         response = self.create_response(403)
-        error = http_error_class(response)
+        error = HTTPError(response)
 
         assert error.status == 403
         assert str(error) == error.msg == 'HTTP Error 403: Forbidden'
@@ -194,80 +188,12 @@ def test_http_error(self, http_error_class):
         assert data == b'test'
         assert repr(error) == '<HTTPError 403: Forbidden>'
 
-    @pytest.mark.parametrize('http_error_class', [HTTPError, lambda *args, **kwargs: _CompatHTTPError(HTTPError(*args, **kwargs))])
-    def test_redirect_http_error(self, http_error_class):
+    def test_redirect_http_error(self):
         response = self.create_response(301)
-        error = http_error_class(response, redirect_loop=True)
+        error = HTTPError(response, redirect_loop=True)
         assert str(error) == error.msg == 'HTTP Error 301: Moved Permanently (redirect loop detected)'
         assert error.reason == 'Moved Permanently'
 
-    def test_compat_http_error(self):
-        response = self.create_response(403)
-        error = _CompatHTTPError(HTTPError(response))
-        assert isinstance(error, HTTPError)
-        assert isinstance(error, urllib.error.HTTPError)
-
-        @contextlib.contextmanager
-        def raises_deprecation_warning():
-            with warnings.catch_warnings(record=True) as w:
-                warnings.simplefilter('always')
-                yield
-
-                if len(w) == 0:
-                    pytest.fail('Did not raise DeprecationWarning')
-                if len(w) > 1:
-                    pytest.fail(f'Raised multiple warnings: {w}')
-
-                if not issubclass(w[-1].category, DeprecationWarning):
-                    pytest.fail(f'Expected DeprecationWarning, got {w[-1].category}')
-                w.clear()
-
-        with raises_deprecation_warning():
-            assert error.code == 403
-
-        with raises_deprecation_warning():
-            assert error.getcode() == 403
-
-        with raises_deprecation_warning():
-            assert error.hdrs is error.response.headers
-
-        with raises_deprecation_warning():
-            assert error.info() is error.response.headers
-
-        with raises_deprecation_warning():
-            assert error.headers is error.response.headers
-
-        with raises_deprecation_warning():
-            assert error.filename == error.response.url
-
-        with raises_deprecation_warning():
-            assert error.url == error.response.url
-
-        with raises_deprecation_warning():
-            assert error.geturl() == error.response.url
-
-        # Passthrough file operations
-        with raises_deprecation_warning():
-            assert error.read() == b'test'
-
-        with raises_deprecation_warning():
-            assert not error.closed
-
-        with raises_deprecation_warning():
-            # Technically Response operations are also passed through, which should not be used.
-            assert error.get_header('test') == 'test'
-
-        # Should not raise a warning
-        error.close()
-
-    @pytest.mark.skipif(
-        platform.python_implementation() == 'PyPy', reason='garbage collector works differently in pypy')
-    def test_compat_http_error_autoclose(self):
-        # Compat HTTPError should not autoclose response
-        response = self.create_response(403)
-        _CompatHTTPError(HTTPError(response))
-        assert not response.closed
-
     def test_incomplete_read_error(self):
         error = IncompleteRead(4, 3, cause='test')
         assert isinstance(error, IncompleteRead)

diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py
@@ -40,7 +40,6 @@
     NoSupportingHandlers,
     RequestError,
     SSLError,
-    _CompatHTTPError,
     network_exceptions,
 )
 from .plugins import directories as plugin_directories
@@ -4112,8 +4111,6 @@ def urlopen(self, req):
                     'SSLV3_ALERT_HANDSHAKE_FAILURE: The server may not support the current cipher list. '
                     'Try using --legacy-server-connect', cause=e) from e
             raise
-        except HTTPError as e:  # TODO: Remove in a future release
-            raise _CompatHTTPError(e) from e
 
     def build_request_director(self, handlers, preferences=None):
         logger = _YDLLogger(self)

diff --git a/yt_dlp/compat/_legacy.py b/yt_dlp/compat/_legacy.py
@@ -35,6 +35,7 @@
 from ..dependencies import brotli as compat_brotli  # noqa: F401
 from ..dependencies import websockets as compat_websockets  # noqa: F401
 from ..dependencies.Cryptodome import AES as compat_pycrypto_AES  # noqa: F401
+from ..networking.exceptions import HTTPError as compat_HTTPError  # noqa: F401
 
 passthrough_module(__name__, '...utils', ('WINDOWS_VT_MODE', 'windows_enable_vt_mode'))
 
@@ -70,7 +71,6 @@ def compat_setenv(key, value, env=os.environ):
 compat_HTMLParser = compat_html_parser_HTMLParser = html.parser.HTMLParser
 compat_http_client = http.client
 compat_http_server = http.server
-compat_HTTPError = urllib.error.HTTPError
 compat_input = input
 compat_integer_types = (int, )
 compat_itertools_count = itertools.count
@@ -88,7 +88,7 @@ def compat_setenv(key, value, env=os.environ):
 compat_subprocess_get_DEVNULL = lambda: subprocess.DEVNULL
 compat_tokenize_tokenize = tokenize.tokenize
 compat_urllib_error = urllib.error
-compat_urllib_HTTPError = urllib.error.HTTPError
+compat_urllib_HTTPError = compat_HTTPError
 compat_urllib_parse = urllib.parse
 compat_urllib_parse_parse_qs = urllib.parse.parse_qs
 compat_urllib_parse_quote = urllib.parse.quote

diff --git a/yt_dlp/downloader/hls.py b/yt_dlp/downloader/hls.py
@@ -369,7 +369,10 @@ def fin_fragments():
 
                 return output.getvalue().encode()
 
-            self.download_and_append_fragments(
-                ctx, fragments, info_dict, pack_func=pack_fragment, finish_func=fin_fragments)
+            if len(fragments) == 1:
+                self.download_and_append_fragments(ctx, fragments, info_dict)
+            else:
+                self.download_and_append_fragments(
+                    ctx, fragments, info_dict, pack_func=pack_fragment, finish_func=fin_fragments)
         else:
             return self.download_and_append_fragments(ctx, fragments, info_dict)
diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py
@@ -1111,6 +1111,7 @@
     MotherlessIE,
     MotherlessGroupIE,
     MotherlessGalleryIE,
+    MotherlessUploaderIE,
 )
 from .motorsport import MotorsportIE
 from .moviepilot import MoviepilotIE
@@ -1137,6 +1138,11 @@
     MusicdexArtistIE,
     MusicdexPlaylistIE,
 )
+from .mx3 import (
+    Mx3IE,
+    Mx3NeoIE,
+    Mx3VolksmusikIE,
+)
 from .mxplayer import (
     MxplayerIE,
     MxplayerShowIE,
@@ -1593,6 +1599,7 @@
     RedBullIE,
 )
 from .reddit import RedditIE
+from .redge import RedCDNLivxIE
 from .redgifs import (
     RedGifsIE,
     RedGifsSearchIE,
@@ -1727,6 +1734,7 @@
 )
 from .scrolller import ScrolllerIE
 from .seeker import SeekerIE
+from .sejmpl import SejmIE
 from .senalcolombia import SenalColombiaLiveIE
 from .senategov import SenateISVPIE, SenateGovIE
 from .sendtonews import SendtoNewsIE

diff --git a/yt_dlp/extractor/ard.py b/yt_dlp/extractor/ard.py
@@ -8,6 +8,7 @@
     determine_ext,
     int_or_none,
     join_nonempty,
+    jwt_decode_hs256,
     make_archive_id,
     parse_duration,
     parse_iso8601,
@@ -238,6 +239,7 @@ class ARDBetaMediathekIE(InfoExtractor):
         (?P<id>[a-zA-Z0-9]+)
         /?(?:[?#]|$)'''
     _GEO_COUNTRIES = ['DE']
+    _TOKEN_URL = 'https://sso.ardmediathek.de/sso/token'
 
     _TESTS = [{
         'url': 'https://www.ardmediathek.de/video/filme-im-mdr/liebe-auf-vier-pfoten/mdr-fernsehen/Y3JpZDovL21kci5kZS9zZW5kdW5nLzI4MjA0MC80MjIwOTEtNDAyNTM0',
@@ -359,12 +361,27 @@ def _extract_episode_info(self, title):
 
     def _real_extract(self, url):
         display_id = self._match_id(url)
+        query = {'embedded': 'false', 'mcV6': 'true'}
+        headers = {}
+
+        if self._get_cookies(self._TOKEN_URL).get('ams'):
+            token = self._download_json(
+                self._TOKEN_URL, display_id, 'Fetching token for age verification',
+                'Unable to fetch age verification token', fatal=False)
+            id_token = traverse_obj(token, ('idToken', {str}))
+            decoded_token = traverse_obj(id_token, ({jwt_decode_hs256}, {dict}))
+            user_id = traverse_obj(decoded_token, (('user_id', 'sub'), {str}), get_all=False)
+            if not user_id:
+                self.report_warning('Unable to extract token, continuing without authentication')
+            else:
+                headers['x-authorization'] = f'Bearer {id_token}'
+                query['userId'] = user_id
+                if decoded_token.get('age_rating') != 18:
+                    self.report_warning('Account is not verified as 18+; video may be unavailable')
 
         page_data = self._download_json(
-            f'https://api.ardmediathek.de/page-gateway/pages/ard/item/{display_id}', display_id, query={
-                'embedded': 'false',
-                'mcV6': 'true',
-            })
+            f'https://api.ardmediathek.de/page-gateway/pages/ard/item/{display_id}',
+            display_id, query=query, headers=headers)
 
         # For user convenience we use the old contentId instead of the longer crid
         # Ref: https://github.com/yt-dlp/yt-dlp/issues/8731#issuecomment-1874398283
@@ -383,7 +400,7 @@ def _real_extract(self, url):
         media_data = traverse_obj(player_data, ('mediaCollection', 'embedded', {dict}))
 
         if player_data.get('blockedByFsk'):
-            self.raise_no_formats('This video is only available after 22:00', expected=True)
+            self.raise_login_required('This video is only available for age verified users or after 22:00')
 
         formats = []
         subtitles = {}

diff --git a/yt_dlp/extractor/motherless.py b/yt_dlp/extractor/motherless.py
@@ -177,6 +177,7 @@ def _real_extract(self, url):
 
 
 class MotherlessPaginatedIE(InfoExtractor):
+    _EXTRA_QUERY = {}
     _PAGE_SIZE = 60
 
     def _correct_path(self, url, item_id):
@@ -199,7 +200,7 @@ def _real_extract(self, url):
         def get_page(idx):
             page = idx + 1
             current_page = webpage if not idx else self._download_webpage(
-                real_url, item_id, note=f'Downloading page {page}', query={'page': page})
+                real_url, item_id, note=f'Downloading page {page}', query={'page': page, **self._EXTRA_QUERY})
             yield from self._extract_entries(current_page, real_url)
 
         return self.playlist_result(
@@ -213,7 +214,7 @@ class MotherlessGroupIE(MotherlessPaginatedIE):
         'url': 'http://motherless.com/gv/movie_scenes',
         'info_dict': {
             'id': 'movie_scenes',
-            'title': 'Movie Scenes',
+            'title': 'Movie Scenes - Videos - Hot and sexy scenes from "regular" movies... Beautiful actresses fully',
         },
         'playlist_mincount': 540,
     }, {
@@ -244,7 +245,7 @@ class MotherlessGalleryIE(MotherlessPaginatedIE):
             'id': '338999F',
             'title': 'Random',
         },
-        'playlist_mincount': 190,
+        'playlist_mincount': 171,
     }, {
         'url': 'https://motherless.com/GVABD6213',
         'info_dict': {
@@ -270,3 +271,27 @@ class MotherlessGalleryIE(MotherlessPaginatedIE):
 
     def _correct_path(self, url, item_id):
         return urllib.parse.urljoin(url, f'/GV{item_id}')
+
+
+class MotherlessUploaderIE(MotherlessPaginatedIE):
+    _VALID_URL = r'https?://(?:www\.)?motherless\.com/u/(?P<id>\w+)/?(?:$|[?#])'
+    _TESTS = [{
+        'url': 'https://motherless.com/u/Mrgo4hrs2023',
+        'info_dict': {
+            'id': 'Mrgo4hrs2023',
+            'title': "Mrgo4hrs2023's Uploads - Videos",
+        },
+        'playlist_mincount': 32,
+    }, {
+        'url': 'https://motherless.com/u/Happy_couple?t=v',
+        'info_dict': {
+            'id': 'Happy_couple',
+            'title': "Happy_couple's Uploads - Videos",
+        },
+        'playlist_mincount': 8,
+    }]
+
+    _EXTRA_QUERY = {'t': 'v'}
+
+    def _correct_path(self, url, item_id):
+        return urllib.parse.urljoin(url, f'/u/{item_id}?t=v')