Merge branch 'yt-dlp:master' into master

observeroftime01 · Sep 3, 2024 · 0b6f99e · 0b6f99e
2 parents 4225f88 + e8e6a98
commit 0b6f99e
Show file tree

Hide file tree

Showing 25 changed files with 616 additions and 132 deletions.
diff --git a/.github/ISSUE_TEMPLATE/1_broken_site.yml b/.github/ISSUE_TEMPLATE/1_broken_site.yml
@@ -77,3 +77,8 @@ body:
       render: shell
     validations:
       required: true
+  - type: markdown
+    attributes:
+      value: |
+        ### NOTE: Due to a recent increase in malicious spam activity, this issue will be automatically locked until it is triaged by a maintainer.
+        ### If you receive any replies asking you download a file, do NOT follow the download links!
diff --git a/.github/ISSUE_TEMPLATE/2_site_support_request.yml b/.github/ISSUE_TEMPLATE/2_site_support_request.yml
@@ -89,3 +89,8 @@ body:
       render: shell
     validations:
       required: true
+  - type: markdown
+    attributes:
+      value: |
+        ### NOTE: Due to a recent increase in malicious spam activity, this issue will be automatically locked until it is triaged by a maintainer.
+        ### If you receive any replies asking you download a file, do NOT follow the download links!
diff --git a/.github/ISSUE_TEMPLATE/3_site_feature_request.yml b/.github/ISSUE_TEMPLATE/3_site_feature_request.yml
@@ -85,3 +85,8 @@ body:
       render: shell
     validations:
       required: true
+  - type: markdown
+    attributes:
+      value: |
+        ### NOTE: Due to a recent increase in malicious spam activity, this issue will be automatically locked until it is triaged by a maintainer.
+        ### If you receive any replies asking you download a file, do NOT follow the download links!
diff --git a/.github/ISSUE_TEMPLATE/4_bug_report.yml b/.github/ISSUE_TEMPLATE/4_bug_report.yml
@@ -70,3 +70,8 @@ body:
       render: shell
     validations:
       required: true
+  - type: markdown
+    attributes:
+      value: |
+        ### NOTE: Due to a recent increase in malicious spam activity, this issue will be automatically locked until it is triaged by a maintainer.
+        ### If you receive any replies asking you download a file, do NOT follow the download links!
diff --git a/.github/ISSUE_TEMPLATE/5_feature_request.yml b/.github/ISSUE_TEMPLATE/5_feature_request.yml
@@ -64,3 +64,8 @@ body:
         [youtube] Extracting URL: https://www.youtube.com/watch?v=BaW_jenozKc
         <more lines>
       render: shell
+  - type: markdown
+    attributes:
+      value: |
+        ### NOTE: Due to a recent increase in malicious spam activity, this issue will be automatically locked until it is triaged by a maintainer.
+        ### If you receive any replies asking you download a file, do NOT follow the download links!
diff --git a/.github/ISSUE_TEMPLATE/6_question.yml b/.github/ISSUE_TEMPLATE/6_question.yml
@@ -70,3 +70,8 @@ body:
         [youtube] Extracting URL: https://www.youtube.com/watch?v=BaW_jenozKc
         <more lines>
       render: shell
+  - type: markdown
+    attributes:
+      value: |
+        ### NOTE: Due to a recent increase in malicious spam activity, this issue will be automatically locked until it is triaged by a maintainer.
+        ### If you receive any replies asking you download a file, do NOT follow the download links!
diff --git a/.github/workflows/antispam.yaml b/.github/workflows/antispam.yaml
@@ -0,0 +1,20 @@
+name: Anti-Spam
+on:
+  issues:
+    types: [opened]
+
+permissions:
+  issues: write
+
+jobs:
+  lockdown:
+    name: Issue Lockdown
+    runs-on: ubuntu-latest
+    steps:
+      - name: "Lock new issue"
+        env:
+          GH_TOKEN: ${{ github.token }}
+          ISSUE_NUMBER: ${{ github.event.issue.number }}
+          REPOSITORY: ${{ github.repository }}
+        run: |
+          gh issue lock "${ISSUE_NUMBER}" -r too_heated -R "${REPOSITORY}"
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
@@ -266,7 +266,7 @@ jobs:
           # We need to ignore wheels otherwise we break universal2 builds
           python3 -m pip install -U --no-binary :all: -r requirements.txt
           # We need to fuse our own universal2 wheels for curl_cffi
-          python3 -m pip install -U delocate
+          python3 -m pip install -U 'delocate==0.11.0'
           mkdir curl_cffi_whls curl_cffi_universal2
           python3 devscripts/install_deps.py --print -o --include curl-cffi > requirements.txt
           for platform in "macosx_11_0_arm64" "macosx_11_0_x86_64"; do

diff --git a/.github/workflows/core.yml b/.github/workflows/core.yml
@@ -35,6 +35,7 @@ jobs:
     - name: Install test requirements
       run: python3 ./devscripts/install_deps.py --include test --include curl-cffi
     - name: Run tests
+      timeout-minutes: 15
       continue-on-error: False
       run: |
         python3 -m yt_dlp -v || true  # Print debug head

diff --git a/.github/workflows/quick-test.yml b/.github/workflows/quick-test.yml
@@ -17,6 +17,7 @@ jobs:
     - name: Install test requirements
       run: python3 ./devscripts/install_deps.py --include test
     - name: Run tests
+      timeout-minutes: 15
       run: |
         python3 -m yt_dlp -v || true
         python3 ./devscripts/run_tests.py core

diff --git a/devscripts/make_issue_template.py b/devscripts/make_issue_template.py
@@ -46,6 +46,11 @@
       render: shell
     validations:
       required: true
+  - type: markdown
+    attributes:
+      value: |
+        ### NOTE: Due to a recent increase in malicious spam activity, this issue will be automatically locked until it is triaged by a maintainer.
+        ### If you receive any replies asking you download a file, do NOT follow the download links!
 '''.strip()
 
 NO_SKIP = '''

diff --git a/pyproject.toml b/pyproject.toml
@@ -49,7 +49,7 @@ dependencies = [
     "pycryptodomex",
     "requests>=2.32.2,<3",
     "urllib3>=1.26.17,<3",
-    "websockets>=12.0",
+    "websockets>=13.0",
 ]
 
 [project.optional-dependencies]

diff --git a/test/test_websockets.py b/test/test_websockets.py
@@ -88,7 +88,7 @@ def create_wss_websocket_server():
     certfn = os.path.join(TEST_DIR, 'testcert.pem')
     sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
     sslctx.load_cert_chain(certfn, None)
-    return create_websocket_server(ssl_context=sslctx)
+    return create_websocket_server(ssl=sslctx)
 
 
 MTLS_CERT_DIR = os.path.join(TEST_DIR, 'testdata', 'certificate')
@@ -103,7 +103,7 @@ def create_mtls_wss_websocket_server():
     sslctx.load_verify_locations(cafile=cacertfn)
     sslctx.load_cert_chain(certfn, None)
 
-    return create_websocket_server(ssl_context=sslctx)
+    return create_websocket_server(ssl=sslctx)
 
 
 def create_legacy_wss_websocket_server():
@@ -112,7 +112,7 @@ def create_legacy_wss_websocket_server():
     sslctx.maximum_version = ssl.TLSVersion.TLSv1_2
     sslctx.set_ciphers('SHA1:AESCCM:aDSS:eNULL:aNULL')
     sslctx.load_cert_chain(certfn, None)
-    return create_websocket_server(ssl_context=sslctx)
+    return create_websocket_server(ssl=sslctx)
 
 
 def ws_validate_and_send(rh, req):
@@ -139,7 +139,7 @@ def setup_class(cls):
         cls.wss_thread, cls.wss_port = create_wss_websocket_server()
         cls.wss_base_url = f'wss://127.0.0.1:{cls.wss_port}'
 
-        cls.bad_wss_thread, cls.bad_wss_port = create_websocket_server(ssl_context=ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER))
+        cls.bad_wss_thread, cls.bad_wss_port = create_websocket_server(ssl=ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER))
         cls.bad_wss_host = f'wss://127.0.0.1:{cls.bad_wss_port}'
 
         cls.mtls_wss_thread, cls.mtls_wss_port = create_mtls_wss_websocket_server()

diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py
@@ -945,6 +945,7 @@
 )
 from .kicker import KickerIE
 from .kickstarter import KickStarterIE
+from .kika import KikaIE
 from .kinja import KinjaEmbedIE
 from .kinopoisk import KinoPoiskIE
 from .kommunetv import KommunetvIE
@@ -2311,6 +2312,7 @@
     VideomoreVideoIE,
 )
 from .videopress import VideoPressIE
+from .vidflex import VidflexIE
 from .vidio import (
     VidioIE,
     VidioLiveIE,

diff --git a/yt_dlp/extractor/asobistage.py b/yt_dlp/extractor/asobistage.py
@@ -101,9 +101,10 @@ def _real_initialize(self):
         self._HEADERS['Authorization'] = f'Bearer {token}'
 
     def _real_extract(self, url):
-        video_id, event, type_, slug = self._match_valid_url(url).group('id', 'event', 'type', 'slug')
+        webpage, urlh = self._download_webpage_handle(url, self._match_id(url))
+        video_id, event, type_, slug = self._match_valid_url(urlh.url).group('id', 'event', 'type', 'slug')
         video_type = {'archive': 'archives', 'player': 'broadcasts'}[type_]
-        webpage = self._download_webpage(url, video_id)
+
         event_data = traverse_obj(
             self._search_nextjs_data(webpage, video_id, default={}),
             ('props', 'pageProps', 'eventCMSData', {

diff --git a/yt_dlp/extractor/eurosport.py b/yt_dlp/extractor/eurosport.py
@@ -3,7 +3,12 @@
 
 
 class EurosportIE(InfoExtractor):
-    _VALID_URL = r'https?://www\.eurosport\.com/\w+/(?:[\w-]+/[\d-]+/)?[\w-]+_(?P<id>vid\d+)'
+    _VALID_URL = r'''(?x)
+        https?://(?:
+            (?:(?:www|espanol)\.)?eurosport\.(?:com(?:\.tr)?|de|dk|es|fr|hu|it|nl|no|ro)|
+            eurosport\.tvn24\.pl
+        )/[\w-]+/(?:[\w-]+/[\d-]+/)?[\w.-]+_(?P<id>vid\d+)
+    '''
     _TESTS = [{
         'url': 'https://www.eurosport.com/tennis/roland-garros/2022/highlights-rafael-nadal-brushes-aside-caper-ruud-to-win-record-extending-14th-french-open-title_vid1694147/video.shtml',
         'info_dict': {
@@ -70,13 +75,50 @@ class EurosportIE(InfoExtractor):
             'duration': 105.0,
             'upload_date': '20230518',
         },
+    }, {
+        'url': 'https://www.eurosport.de/radsport/vuelta-a-espana/2024/vuelta-a-espana-2024-wout-van-aert-und-co.-verzweifeln-an-mcnulty-zeitfahr-krimi-in-lissabon_vid2219478/video.shtml',
+        'only_matching': True,
+    }, {
+        'url': 'https://www.eurosport.dk/speedway/mikkel-michelsen-misser-finalen-i-cardiff-se-danskeren-i-semifinalen-her_vid2219363/video.shtml',
+        'only_matching': True,
+    }, {
+        'url': 'https://www.eurosport.nl/mixed-martial-arts/ufc/2022/ufc-305-respect-tussen-adesanya-en-du-plessis_vid2219650/video.shtml',
+        'only_matching': True,
+    }, {
+        'url': 'https://www.eurosport.es/ciclismo/la-vuelta-2024-carlos-rodriguez-olvida-la-crono-y-ya-espera-que-llegue-la-montana-no-me-encontre-nada-comodo_vid2219682/video.shtml',
+        'only_matching': True,
+    }, {
+        'url': 'https://www.eurosport.fr/football/supercoupe-d-europe/2024-2025/kylian-mbappe-vinicius-junior-eduardo-camavinga-touche.-extraits-de-l-entrainement-du-real-madrid-en-video_vid2216993/video.shtml',
+        'only_matching': True,
+    }, {
+        'url': 'https://www.eurosport.it/calcio/serie-a/2024-2025/samardzic-a-bergamo-per-le-visite-mediche-con-l-atalanta_vid2219680/video.shtml',
+        'only_matching': True,
+    }, {
+        'url': 'https://www.eurosport.hu/kerekpar/vuelta-a-espana/2024/dramai-harc-a-masodpercekert-meglepetesgyoztes-a-vuelta-nyitoszakaszan_vid2219481/video.shtml',
+        'only_matching': True,
+    }, {
+        'url': 'https://www.eurosport.no/golf/fedex-st-jude-championship/2024/ligger-pa-andreplass-sa-skjer-dette-drama_vid30000618/video.shtml',
+        'only_matching': True,
+    }, {
+        'url': 'https://www.eurosport.no/golf/fedex-st-jude-championship/2024/ligger-pa-andreplass-sa-skjer-dette-drama_vid2219531/video.shtml',
+        'only_matching': True,
+    }, {
+        'url': 'https://www.eurosport.ro/tenis/western-southern-open-2/2024/rezumatul-partidei-dintre-zverev-si-shelton-de-la-cincinnati_vid2219657/video.shtml',
+        'only_matching': True,
+    }, {
+        'url': 'https://www.eurosport.com.tr/hentbol/olympic-games-paris-2024/2024/paris-2024-denmark-ile-germany-olimpiyatlarin-onemli-anlari_vid2215836/video.shtml',
+        'only_matching': True,
+    }, {
+        'url': 'https://eurosport.tvn24.pl/kolarstwo/tour-de-france-kobiet/2024/kasia-niewiadoma-przed-ostatnim-8.-etapem-tour-de-france-kobiet_vid2219765/video.shtml',
+        'only_matching': True,
     }]
 
     _TOKEN = None
 
     # actually defined in https://netsport.eurosport.io/?variables={"databaseId":<databaseId>,"playoutType":"VDP"}&extensions={"persistedQuery":{"version":1 ..
     # but this method require to get sha256 hash
     _GEO_COUNTRIES = ['DE', 'NL', 'EU', 'IT', 'FR']  # Not complete list but it should work
+    _GEO_BYPASS = False
 
     def _real_initialize(self):
         if EurosportIE._TOKEN is None:
@@ -98,13 +140,13 @@ def _real_extract(self, url):
         for stream_type in json_data['attributes']['streaming']:
             if stream_type == 'hls':
                 fmts, subs = self._extract_m3u8_formats_and_subtitles(
-                    traverse_obj(json_data, ('attributes', 'streaming', stream_type, 'url')), display_id, ext='mp4')
+                    traverse_obj(json_data, ('attributes', 'streaming', stream_type, 'url')), display_id, ext='mp4', fatal=False)
             elif stream_type == 'dash':
                 fmts, subs = self._extract_mpd_formats_and_subtitles(
-                    traverse_obj(json_data, ('attributes', 'streaming', stream_type, 'url')), display_id)
+                    traverse_obj(json_data, ('attributes', 'streaming', stream_type, 'url')), display_id, fatal=False)
             elif stream_type == 'mss':
                 fmts, subs = self._extract_ism_formats_and_subtitles(
-                    traverse_obj(json_data, ('attributes', 'streaming', stream_type, 'url')), display_id)
+                    traverse_obj(json_data, ('attributes', 'streaming', stream_type, 'url')), display_id, fatal=False)
 
             formats.extend(fmts)
             self._merge_subtitles(subs, target=subtitles)

diff --git a/yt_dlp/extractor/kika.py b/yt_dlp/extractor/kika.py
@@ -0,0 +1,126 @@
+from .common import InfoExtractor
+from ..utils import (
+    determine_ext,
+    int_or_none,
+    parse_duration,
+    parse_iso8601,
+    url_or_none,
+)
+from ..utils.traversal import traverse_obj
+
+
+class KikaIE(InfoExtractor):
+    IE_DESC = 'KiKA.de'
+    _VALID_URL = r'https?://(?:www\.)?kika\.de/[\w/-]+/videos/(?P<id>[a-z-]+\d+)'
+    _GEO_COUNTRIES = ['DE']
+
+    _TESTS = [{
+        'url': 'https://www.kika.de/logo/videos/logo-vom-samstag-einunddreissig-august-zweitausendvierundzwanzig-100',
+        'md5': 'fbfc8da483719ef06f396e5e5b938c69',
+        'info_dict': {
+            'id': 'logo-vom-samstag-einunddreissig-august-zweitausendvierundzwanzig-100',
+            'ext': 'mp4',
+            'upload_date': '20240831',
+            'timestamp': 1725126600,
+            'season_number': 2024,
+            'modified_date': '20240831',
+            'episode': 'Episode 476',
+            'episode_number': 476,
+            'season': 'Season 2024',
+            'duration': 634,
+            'title': 'logo! vom Samstag, 31. August 2024',
+            'modified_timestamp': 1725129983,
+        },
+    }, {
+        'url': 'https://www.kika.de/kaltstart/videos/video92498',
+        'md5': '710ece827e5055094afeb474beacb7aa',
+        'info_dict': {
+            'id': 'video92498',
+            'ext': 'mp4',
+            'title': '7. Wo ist Leo?',
+            'description': 'md5:fb48396a5b75068bcac1df74f1524920',
+            'duration': 436,
+            'timestamp': 1702926876,
+            'upload_date': '20231218',
+            'episode_number': 7,
+            'modified_date': '20240319',
+            'modified_timestamp': 1710880610,
+            'episode': 'Episode 7',
+            'season_number': 1,
+            'season': 'Season 1',
+        },
+    }, {
+        'url': 'https://www.kika.de/bernd-das-brot/astrobrot/videos/video90088',
+        'md5': 'ffd1b700d7de0a6616a1d08544c77294',
+        'info_dict': {
+            'id': 'video90088',
+            'ext': 'mp4',
+            'upload_date': '20221102',
+            'timestamp': 1667390580,
+            'duration': 197,
+            'modified_timestamp': 1711093771,
+            'episode_number': 8,
+            'title': 'Es ist nicht leicht, ein Astrobrot zu sein',
+            'modified_date': '20240322',
+            'description': 'md5:d3641deaf1b5515a160788b2be4159a9',
+            'season_number': 1,
+            'episode': 'Episode 8',
+            'season': 'Season 1',
+        },
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        doc = self._download_json(f'https://www.kika.de/_next-api/proxy/v1/videos/{video_id}', video_id)
+        video_assets = self._download_json(doc['assets']['url'], video_id)
+
+        subtitles = {}
+        if ttml_resource := url_or_none(video_assets.get('videoSubtitle')):
+            subtitles['de'] = [{
+                'url': ttml_resource,
+                'ext': 'ttml',
+            }]
+        if webvtt_resource := url_or_none(video_assets.get('webvttUrl')):
+            subtitles.setdefault('de', []).append({
+                'url': webvtt_resource,
+                'ext': 'vtt',
+            })
+
+        return {
+            'id': video_id,
+            'formats': list(self._extract_formats(video_assets, video_id)),
+            'subtitles': subtitles,
+            **traverse_obj(doc, {
+                'title': ('title', {str}),
+                'description': ('description', {str}),
+                'timestamp': ('date', {parse_iso8601}),
+                'modified_timestamp': ('modificationDate', {parse_iso8601}),
+                'duration': ((
+                    ('durationInSeconds', {int_or_none}),
+                    ('duration', {parse_duration})), any),
+                'episode_number': ('episodeNumber', {int_or_none}),
+                'season_number': ('season', {int_or_none}),
+            }),
+        }
+
+    def _extract_formats(self, media_info, video_id):
+        for media in traverse_obj(media_info, ('assets', lambda _, v: url_or_none(v['url']))):
+            stream_url = media['url']
+            ext = determine_ext(stream_url)
+            if ext == 'm3u8':
+                yield from self._extract_m3u8_formats(
+                    stream_url, video_id, 'mp4', m3u8_id='hls', fatal=False)
+            else:
+                yield {
+                    'url': stream_url,
+                    'format_id': ext,
+                    **traverse_obj(media, {
+                        'width': ('frameWidth', {int_or_none}),
+                        'height': ('frameHeight', {int_or_none}),
+                        # NB: filesize is 0 if unknown, bitrate is -1 if unknown
+                        'filesize': ('fileSize', {int_or_none}, {lambda x: x or None}),
+                        'abr': ('bitrateAudio', {int_or_none}, {lambda x: None if x == -1 else x}),
+                        'vbr': ('bitrateVideo', {int_or_none}, {lambda x: None if x == -1 else x}),
+                    }),
+                }