diff --git a/CONTRIBUTORS b/CONTRIBUTORS index 9b8207b28b83..4b69642603c7 100644 --- a/CONTRIBUTORS +++ b/CONTRIBUTORS @@ -707,3 +707,9 @@ Sakura286 SamDecrock stratus-ss subrat-lima +gitninja1234 +jkruse +xiaomac +wesson09 +Crypto90 +MutantPiggieGolem1 diff --git a/Changelog.md b/Changelog.md index 41a2da744d43..75e8240335d9 100644 --- a/Changelog.md +++ b/Changelog.md @@ -4,11 +4,74 @@ # To create a release, dispatch the https://github.com/yt-dlp/yt-dlp/actions/workflows/release.yml workflow on master --> +### 2024.12.13 + +#### Extractor changes +- **patreon**: campaign: [Support /c/ URLs](https://github.com/yt-dlp/yt-dlp/commit/bc262bcad4d3683ceadf61a7eb87e233e72adef3) ([#11756](https://github.com/yt-dlp/yt-dlp/issues/11756)) by [bashonly](https://github.com/bashonly) +- **soundcloud**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/f4d3e9e6dc25077b79849a31a2f67f93fdc01e62) ([#11777](https://github.com/yt-dlp/yt-dlp/issues/11777)) by [bashonly](https://github.com/bashonly) +- **youtube** + - [Fix `release_date` extraction](https://github.com/yt-dlp/yt-dlp/commit/d5e2a379f2adcb28bc48c7d9e90716d7278f89d2) ([#11759](https://github.com/yt-dlp/yt-dlp/issues/11759)) by [MutantPiggieGolem1](https://github.com/MutantPiggieGolem1) + - [Fix signature function extraction for `2f1832d2`](https://github.com/yt-dlp/yt-dlp/commit/5460cd91891bf613a2065e2fc278d9903c37a127) ([#11801](https://github.com/yt-dlp/yt-dlp/issues/11801)) by [bashonly](https://github.com/bashonly) + - [Prioritize original language over auto-dubbed audio](https://github.com/yt-dlp/yt-dlp/commit/dc3c4fddcc653989dae71fc563d82a308fc898cc) ([#11803](https://github.com/yt-dlp/yt-dlp/issues/11803)) by [bashonly](https://github.com/bashonly) + - search_url: [Fix playlist searches](https://github.com/yt-dlp/yt-dlp/commit/f6c73aad5f1a67544bea137ebd9d1e22e0e56567) ([#11782](https://github.com/yt-dlp/yt-dlp/issues/11782)) by [Crypto90](https://github.com/Crypto90) + +#### Misc. changes +- **cleanup**: [Make more playlist entries lazy](https://github.com/yt-dlp/yt-dlp/commit/54216696261bc07cacd9a837c501d9e0b7fed09e) ([#11763](https://github.com/yt-dlp/yt-dlp/issues/11763)) by [seproDev](https://github.com/seproDev) + +### 2024.12.06 + +#### Core changes +- **cookies**: [Add `--cookies-from-browser` support for MS Store Firefox](https://github.com/yt-dlp/yt-dlp/commit/354cb4026cf2191e1a130ec2a627b95cabfbc60a) ([#11731](https://github.com/yt-dlp/yt-dlp/issues/11731)) by [wesson09](https://github.com/wesson09) + +#### Extractor changes +- **bilibili**: [Fix HD formats extraction](https://github.com/yt-dlp/yt-dlp/commit/fca3eb5f8be08d5fab2e18b45b7281a12e566725) ([#11734](https://github.com/yt-dlp/yt-dlp/issues/11734)) by [grqz](https://github.com/grqz) +- **soundcloud**: [Fix formats extraction](https://github.com/yt-dlp/yt-dlp/commit/2feb28028ee48f2185d2d95076e62accb09b9e2e) ([#11742](https://github.com/yt-dlp/yt-dlp/issues/11742)) by [bashonly](https://github.com/bashonly) +- **youtube** + - [Fix `n` sig extraction for player `3bb1f723`](https://github.com/yt-dlp/yt-dlp/commit/a95ee6d8803fca9157adecf63732ab58bf87fd88) ([#11750](https://github.com/yt-dlp/yt-dlp/issues/11750)) by [bashonly](https://github.com/bashonly) (With fixes in [4bd2655](https://github.com/yt-dlp/yt-dlp/commit/4bd2655398aed450456197a6767639114a24eac2)) + - [Fix signature function extraction](https://github.com/yt-dlp/yt-dlp/commit/4c85ccd1366c88cf93982f8350f58eed17355981) ([#11751](https://github.com/yt-dlp/yt-dlp/issues/11751)) by [bashonly](https://github.com/bashonly) + - [Player client maintenance](https://github.com/yt-dlp/yt-dlp/commit/2e49c789d3eebc39af8910705d65a98bca0e4c4f) ([#11724](https://github.com/yt-dlp/yt-dlp/issues/11724)) by [bashonly](https://github.com/bashonly) + +### 2024.12.03 + +#### Core changes +- [Add `playlist_webpage_url` field](https://github.com/yt-dlp/yt-dlp/commit/7d6c259a03bc4707a319e5e8c6eff0278707874b) ([#11613](https://github.com/yt-dlp/yt-dlp/issues/11613)) by [seproDev](https://github.com/seproDev) + +#### Extractor changes +- [Handle fragmented formats in `_remove_duplicate_formats`](https://github.com/yt-dlp/yt-dlp/commit/e0500cbf796323551bbabe5b8ed8c75a511ba47a) ([#11637](https://github.com/yt-dlp/yt-dlp/issues/11637)) by [Grub4K](https://github.com/Grub4K) +- **bilibili** + - [Always try to extract HD formats](https://github.com/yt-dlp/yt-dlp/commit/dc1687648077c5bf64863b307ecc5ab7e029bd8d) ([#10559](https://github.com/yt-dlp/yt-dlp/issues/10559)) by [grqz](https://github.com/grqz) + - [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/239f5f36fe04603bec59c8b975f6a792f10246db) ([#11667](https://github.com/yt-dlp/yt-dlp/issues/11667)) by [grqz](https://github.com/grqz) (With fixes in [f05a1cd](https://github.com/yt-dlp/yt-dlp/commit/f05a1cd1492fc98dc8d80d2081d632a1879913d2) by [bashonly](https://github.com/bashonly), [grqz](https://github.com/grqz)) + - [Fix subtitles and chapters extraction](https://github.com/yt-dlp/yt-dlp/commit/a13a336aa6f906812701abec8101b73b73db8ff7) ([#11708](https://github.com/yt-dlp/yt-dlp/issues/11708)) by [xiaomac](https://github.com/xiaomac) +- **chaturbate**: [Fix support for non-public streams](https://github.com/yt-dlp/yt-dlp/commit/4b5eec0aaa7c02627f27a386591b735b90e681a8) ([#11624](https://github.com/yt-dlp/yt-dlp/issues/11624)) by [jkruse](https://github.com/jkruse) +- **dacast**: [Fix HLS AES formats extraction](https://github.com/yt-dlp/yt-dlp/commit/0a0d80800b9350d1a4c4b18d82cfb77ffbc3c507) ([#11644](https://github.com/yt-dlp/yt-dlp/issues/11644)) by [bashonly](https://github.com/bashonly) +- **dropbox**: [Fix password-protected video extraction](https://github.com/yt-dlp/yt-dlp/commit/00dcde728635633eee969ad4d498b9f233c4a94e) ([#11636](https://github.com/yt-dlp/yt-dlp/issues/11636)) by [bashonly](https://github.com/bashonly) +- **duoplay**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/62cba8a1bedbfc0ddde7267ae57b72bf5f7ea7b1) ([#11588](https://github.com/yt-dlp/yt-dlp/issues/11588)) by [bashonly](https://github.com/bashonly), [glensc](https://github.com/glensc) +- **facebook**: [Support more groups URLs](https://github.com/yt-dlp/yt-dlp/commit/e0f1ae813b36e783e2348ba2a1566e12f5cd8f6e) ([#11576](https://github.com/yt-dlp/yt-dlp/issues/11576)) by [grqz](https://github.com/grqz) +- **instagram**: [Support `share` URLs](https://github.com/yt-dlp/yt-dlp/commit/360aed810ad85db950df586282d256516c98cd2d) ([#11677](https://github.com/yt-dlp/yt-dlp/issues/11677)) by [grqz](https://github.com/grqz) +- **microsoftembed**: [Make format extraction non fatal](https://github.com/yt-dlp/yt-dlp/commit/2bea7936323ca4b6f3b9b1fdd892566223e30efa) ([#11654](https://github.com/yt-dlp/yt-dlp/issues/11654)) by [seproDev](https://github.com/seproDev) +- **mitele**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/cd0f934604587ed793e9177f6a127e5dcf99a7dd) ([#11683](https://github.com/yt-dlp/yt-dlp/issues/11683)) by [DarkZeros](https://github.com/DarkZeros) +- **stripchat**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/16336c51d0848a6868a4fa04e749fa03548b4913) ([#11596](https://github.com/yt-dlp/yt-dlp/issues/11596)) by [gitninja1234](https://github.com/gitninja1234) +- **tiktok**: [Deprioritize animated thumbnails](https://github.com/yt-dlp/yt-dlp/commit/910ecc422930bca14e2abe4986f5f92359e3cea8) ([#11645](https://github.com/yt-dlp/yt-dlp/issues/11645)) by [bashonly](https://github.com/bashonly) +- **vk**: [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/c038a7b187ba24360f14134842a7a2cf897c33b1) ([#11715](https://github.com/yt-dlp/yt-dlp/issues/11715)) by [bashonly](https://github.com/bashonly) +- **youtube** + - [Adjust player clients for site changes](https://github.com/yt-dlp/yt-dlp/commit/0d146c1e36f467af30e87b7af651bdee67b73500) ([#11663](https://github.com/yt-dlp/yt-dlp/issues/11663)) by [bashonly](https://github.com/bashonly) + - tab: [Fix playlists tab extraction](https://github.com/yt-dlp/yt-dlp/commit/fe70f20aedf528fdee332131bc9b6710e54e6f10) ([#11615](https://github.com/yt-dlp/yt-dlp/issues/11615)) by [seproDev](https://github.com/seproDev) + +#### Networking changes +- **Request Handler**: websockets: [Support websockets 14.0+](https://github.com/yt-dlp/yt-dlp/commit/c7316373c0a886f65a07a51e50ee147bb3294c85) ([#11616](https://github.com/yt-dlp/yt-dlp/issues/11616)) by [coletdjnz](https://github.com/coletdjnz) + +#### Misc. changes +- **cleanup** + - [Bump ruff to 0.8.x](https://github.com/yt-dlp/yt-dlp/commit/d8fb3490863653182864d2a53522f350d67a9ff8) ([#11608](https://github.com/yt-dlp/yt-dlp/issues/11608)) by [seproDev](https://github.com/seproDev) + - Miscellaneous + - [ccf0a6b](https://github.com/yt-dlp/yt-dlp/commit/ccf0a6b86b7f68a75463804fe485ec240b8635f0) by [bashonly](https://github.com/bashonly), [pzhlkj6612](https://github.com/pzhlkj6612) + - [2b67ac3](https://github.com/yt-dlp/yt-dlp/commit/2b67ac300ac8b44368fb121637d1743cea8c5b6b) by [bashonly](https://github.com/bashonly), [seproDev](https://github.com/seproDev) + ### 2024.11.18 #### Important changes - **Login with OAuth is no longer supported for YouTube** -Due to a change made by the site, yt-dlp is longer able to support OAuth login for YouTube. [Read more](https://github.com/yt-dlp/yt-dlp/issues/11462#issuecomment-2471703090) +Due to a change made by the site, yt-dlp is no longer able to support OAuth login for YouTube. [Read more](https://github.com/yt-dlp/yt-dlp/issues/11462#issuecomment-2471703090) #### Core changes - [Catch broken Cryptodome installations](https://github.com/yt-dlp/yt-dlp/commit/b83ca24eb72e1e558b0185bd73975586c0bc0546) ([#11486](https://github.com/yt-dlp/yt-dlp/issues/11486)) by [seproDev](https://github.com/seproDev) diff --git a/README.md b/README.md index 6dadb56181db..0ebb0320505f 100644 --- a/README.md +++ b/README.md @@ -1294,6 +1294,7 @@ The available fields are: - `playlist_uploader_id` (string): Nickname or id of the playlist uploader - `playlist_channel` (string): Display name of the channel that uploaded the playlist - `playlist_channel_id` (string): Identifier of the channel that uploaded the playlist + - `playlist_webpage_url` (string): URL of the playlist webpage - `webpage_url` (string): A URL to the video webpage which, if given to yt-dlp, should yield the same result again - `webpage_url_basename` (string): The basename of the webpage URL - `webpage_url_domain` (string): The domain of the webpage URL @@ -1760,7 +1761,7 @@ $ yt-dlp --replace-in-metadata "title,uploader" "[ _]" "-" # EXTRACTOR ARGUMENTS -Some extractors accept additional arguments which can be passed using `--extractor-args KEY:ARGS`. `ARGS` is a `;` (semicolon) separated string of `ARG=VAL1,VAL2`. E.g. `--extractor-args "youtube:player-client=mediaconnect,web;formats=incomplete" --extractor-args "funimation:version=uncut"` +Some extractors accept additional arguments which can be passed using `--extractor-args KEY:ARGS`. `ARGS` is a `;` (semicolon) separated string of `ARG=VAL1,VAL2`. E.g. `--extractor-args "youtube:player-client=tv,mweb;formats=incomplete" --extractor-args "funimation:version=uncut"` Note: In CLI, `ARG` can use `-` instead of `_`; e.g. `youtube:player-client"` becomes `youtube:player_client"` @@ -1769,7 +1770,7 @@ The following extractors use this feature: #### youtube * `lang`: Prefer translated metadata (`title`, `description` etc) of this language code (case-sensitive). By default, the video primary language metadata is preferred, with a fallback to `en` translated. See [youtube.py](https://github.com/yt-dlp/yt-dlp/blob/c26f9b991a0681fd3ea548d535919cec1fbbd430/yt_dlp/extractor/youtube.py#L381-L390) for list of supported content language codes * `skip`: One or more of `hls`, `dash` or `translated_subs` to skip extraction of the m3u8 manifests, dash manifests and [auto-translated subtitles](https://github.com/yt-dlp/yt-dlp/issues/4090#issuecomment-1158102032) respectively -* `player_client`: Clients to extract video data from. The main clients are `web`, `ios` and `android`, with variants `_music` and `_creator` (e.g. `ios_creator`); and `mweb`, `mediaconnect`, `android_vr`, `web_safari`, `web_embedded`, `tv` and `tv_embedded` with no variants. By default, `ios,mweb` is used, and `web_creator` is added as needed for age-gated videos when account age verification is required. Similarly, the `_music` variants are added for `music.youtube.com` URLs. Some clients, such as `web` and `android`, require a `po_token` for their formats to be downloadable. Some clients, such as the `_creator` variants, will only work with authentication. You can use `all` to use all the clients, and `default` for the default clients. You can prefix a client with `-` to exclude it, e.g. `youtube:player_client=all,-web` +* `player_client`: Clients to extract video data from. The main clients are `web`, `ios` and `android`, with variants `_music` and `_creator` (e.g. `ios_creator`); and `mweb`, `android_vr`, `web_safari`, `web_embedded`, `tv` and `tv_embedded` with no variants. By default, `ios,mweb` is used, or `web_creator,mweb` is used when authenticating with cookies. The `_music` variants are added for `music.youtube.com` URLs. Some clients, such as `web` and `android`, require a `po_token` for their formats to be downloadable. Some clients, such as the `_creator` variants, will only work with authentication. Not all clients support authentication via cookies. You can use `all` to use all the clients, and `default` for the default clients. You can prefix a client with `-` to exclude it, e.g. `youtube:player_client=all,-web` * `player_skip`: Skip some network requests that are generally needed for robust extraction. One or more of `configs` (skip client configs), `webpage` (skip initial webpage), `js` (skip js player). While these options can help reduce the number of requests needed or avoid some rate-limiting, they could cause some issues. See [#860](https://github.com/yt-dlp/yt-dlp/pull/860) for more details * `player_params`: YouTube player parameters to use for player requests. Will overwrite any default ones set by yt-dlp. * `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side) @@ -1859,7 +1860,7 @@ The following extractors use this feature: * `cdn`: One or more CDN IDs to use with the API call for stream URLs, e.g. `gcp_cdn`, `gs_cdn_pc_app`, `gs_cdn_mobile_web`, `gs_cdn_pc_web` #### soundcloud -* `formats`: Formats to request from the API. Requested values should be in the format of `{protocol}_{extension}` (omitting the bitrate), e.g. `hls_opus,http_aac`. The `*` character functions as a wildcard, e.g. `*_mp3`, and can be passed by itself to request all formats. Known protocols include `http`, `hls` and `hls-aes`; known extensions include `aac`, `opus` and `mp3`. Original `download` formats are always extracted. Default is `http_aac,hls_aac,http_opus,hls_opus,http_mp3,hls_mp3` +* `formats`: Formats to request from the API. Requested values should be in the format of `{protocol}_{codec}`, e.g. `hls_opus,http_aac`. The `*` character functions as a wildcard, e.g. `*_mp3`, and can be passed by itself to request all formats. Known protocols include `http`, `hls` and `hls-aes`; known codecs include `aac`, `opus` and `mp3`. Original `download` formats are always extracted. Default is `http_aac,hls_aac,http_opus,hls_opus,http_mp3,hls_mp3` #### orfon (orf:on) * `prefer_segments_playlist`: Prefer a playlist of program segments instead of a single complete video when available. If individual segments are desired, use `--concat-playlist never --extractor-args "orfon:prefer_segments_playlist"` diff --git a/devscripts/changelog_override.json b/devscripts/changelog_override.json index 906e5cf728fe..079e2f7296da 100644 --- a/devscripts/changelog_override.json +++ b/devscripts/changelog_override.json @@ -238,6 +238,6 @@ { "action": "add", "when": "52c0ffe40ad6e8404d93296f575007b05b04c686", - "short": "[priority] **Login with OAuth is no longer supported for YouTube**\nDue to a change made by the site, yt-dlp is longer able to support OAuth login for YouTube. [Read more](https://github.com/yt-dlp/yt-dlp/issues/11462#issuecomment-2471703090)" + "short": "[priority] **Login with OAuth is no longer supported for YouTube**\nDue to a change made by the site, yt-dlp is no longer able to support OAuth login for YouTube. [Read more](https://github.com/yt-dlp/yt-dlp/issues/11462#issuecomment-2471703090)" } ] diff --git a/pyproject.toml b/pyproject.toml index 92d399e3195b..96e2d669a435 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -52,7 +52,7 @@ default = [ "pycryptodomex", "requests>=2.32.2,<3", "urllib3>=1.26.17,<3", - "websockets>=13.0,<14", + "websockets>=13.0", ] curl-cffi = [ "curl-cffi==0.5.10; os_name=='nt' and implementation_name=='cpython'", @@ -76,7 +76,7 @@ dev = [ ] static-analysis = [ "autopep8~=2.0", - "ruff~=0.7.0", + "ruff~=0.8.0", ] test = [ "pytest~=8.1", @@ -186,6 +186,7 @@ ignore = [ "E501", # line-too-long "E731", # lambda-assignment "E741", # ambiguous-variable-name + "UP031", # printf-string-formatting "UP036", # outdated-version-block "B006", # mutable-argument-default "B008", # function-call-in-default-argument @@ -258,9 +259,6 @@ select = [ "A002", # builtin-argument-shadowing "C408", # unnecessary-collection-call ] -"yt_dlp/jsinterp.py" = [ - "UP031", # printf-string-formatting -] [tool.ruff.lint.isort] known-first-party = [ diff --git a/test/test_socks.py b/test/test_socks.py index 68af19d0ca44..f601fc8a5e47 100644 --- a/test/test_socks.py +++ b/test/test_socks.py @@ -216,7 +216,9 @@ def handle(self): protocol = websockets.ServerProtocol() connection = websockets.sync.server.ServerConnection(socket=self.request, protocol=protocol, close_timeout=0) connection.handshake() - connection.send(json.dumps(self.socks_info)) + for message in connection: + if message == 'socks_info': + connection.send(json.dumps(self.socks_info)) connection.close() diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py index 0f7ae34f44f5..13436f088451 100644 --- a/test/test_youtube_signature.py +++ b/test/test_youtube_signature.py @@ -68,6 +68,16 @@ '2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA', 'AOq0QJ8wRAIgXmPlOPSBkkUs1bYFYlJCfe29xx8j7v1pDL2QwbdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_EMu-m37KtXJoOySqa0', ), + ( + 'https://www.youtube.com/s/player/3bb1f723/player_ias.vflset/en_US/base.js', + '2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA', + 'MyOSJXtKI3m-uME_jv7-pT12gOFC02RFkGoqWpzE0Cs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA', + ), + ( + 'https://www.youtube.com/s/player/2f1832d2/player_ias.vflset/en_US/base.js', + '2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA', + '0QJ8wRAIgXmPlOPSBkkUs1bYFYlJCfe29xxAj7v1pDL0QwbdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_EMu-m37KtXJ2OySqa0q', + ), ] _NSIG_TESTS = [ @@ -183,6 +193,14 @@ 'https://www.youtube.com/s/player/b12cc44b/player_ias.vflset/en_US/base.js', 'keLa5R2U00sR9SQK', 'N1OGyujjEwMnLw', ), + ( + 'https://www.youtube.com/s/player/3bb1f723/player_ias.vflset/en_US/base.js', + 'gK15nzVyaXE9RsMP3z', 'ZFFWFLPWx9DEgQ', + ), + ( + 'https://www.youtube.com/s/player/2f1832d2/player_ias.vflset/en_US/base.js', + 'YWt1qdbe8SAfkoPHW5d', 'RrRjWQOJmBiP', + ), ] @@ -254,8 +272,11 @@ def signature(jscode, sig_input): def n_sig(jscode, sig_input): - funcname = YoutubeIE(FakeYDL())._extract_n_function_name(jscode) - return JSInterpreter(jscode).call_function(funcname, sig_input) + ie = YoutubeIE(FakeYDL()) + funcname = ie._extract_n_function_name(jscode) + jsi = JSInterpreter(jscode) + func = jsi.extract_function_from_code(*ie._fixup_n_function_code(*jsi.extract_function_code(funcname))) + return func([sig_input]) make_sig_test = t_factory( diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index b1cedb7fc6b9..4158e6f780e9 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -1116,7 +1116,7 @@ def report_file_delete(self, file_name): def raise_no_formats(self, info, forced=False, *, msg=None): has_drm = info.get('_has_drm') ignored, expected = self.params.get('ignore_no_formats_error'), bool(msg) - msg = msg or has_drm and 'This video is DRM protected' or 'No video formats found!' + msg = msg or (has_drm and 'This video is DRM protected') or 'No video formats found!' if forced or not ignored: raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'], expected=has_drm or ignored or expected) @@ -1947,6 +1947,7 @@ def _playlist_infodict(ie_result, strict=False, **kwargs): 'playlist_uploader_id': ie_result.get('uploader_id'), 'playlist_channel': ie_result.get('channel'), 'playlist_channel_id': ie_result.get('channel_id'), + 'playlist_webpage_url': ie_result.get('webpage_url'), **kwargs, } if strict: @@ -2195,7 +2196,7 @@ def _select_formats(self, formats, selector): def _default_format_spec(self, info_dict): prefer_best = ( self.params['outtmpl']['default'] == '-' - or info_dict.get('is_live') and not self.params.get('live_from_start')) + or (info_dict.get('is_live') and not self.params.get('live_from_start'))) def can_merge(): merger = FFmpegMergerPP(self) @@ -2364,7 +2365,7 @@ def _merge(formats_pair): vexts=[f['ext'] for f in video_fmts], aexts=[f['ext'] for f in audio_fmts], preferences=(try_call(lambda: self.params['merge_output_format'].split('/')) - or self.params.get('prefer_free_formats') and ('webm', 'mkv'))) + or (self.params.get('prefer_free_formats') and ('webm', 'mkv')))) filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info)) @@ -3540,8 +3541,8 @@ def ffmpeg_fixup(cndn, msg, cls): and info_dict.get('container') == 'm4a_dash', 'writing DASH m4a. Only some players support this container', FFmpegFixupM4aPP) - ffmpeg_fixup(downloader == 'hlsnative' and not self.params.get('hls_use_mpegts') - or info_dict.get('is_live') and self.params.get('hls_use_mpegts') is None, + ffmpeg_fixup((downloader == 'hlsnative' and not self.params.get('hls_use_mpegts')) + or (info_dict.get('is_live') and self.params.get('hls_use_mpegts') is None), 'Possible MPEG-TS in MP4 container or malformed AAC timestamps', FFmpegFixupM3u8PP) ffmpeg_fixup(downloader == 'dashsegments' diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index a1880bf7dc29..20111175b1c3 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -1062,7 +1062,7 @@ def make_row(target, handler): # If we only have a single process attached, then the executable was double clicked # When using `pyinstaller` with `--onefile`, two processes get attached is_onefile = hasattr(sys, '_MEIPASS') and os.path.basename(sys._MEIPASS).startswith('_MEI') - if attached_processes == 1 or is_onefile and attached_processes == 2: + if attached_processes == 1 or (is_onefile and attached_processes == 2): print(parser._generate_error_message( 'Do not double-click the executable, instead call it from a command line.\n' 'Please read the README for further information on how to use yt-dlp: ' @@ -1109,9 +1109,9 @@ def main(argv=None): from .extractor import gen_extractors, list_extractors __all__ = [ - 'main', 'YoutubeDL', - 'parse_options', 'gen_extractors', 'list_extractors', + 'main', + 'parse_options', ] diff --git a/yt_dlp/aes.py b/yt_dlp/aes.py index 0930d36df994..9908434a5894 100644 --- a/yt_dlp/aes.py +++ b/yt_dlp/aes.py @@ -534,19 +534,17 @@ def ghash(subkey, data): __all__ = [ 'aes_cbc_decrypt', 'aes_cbc_decrypt_bytes', - 'aes_ctr_decrypt', - 'aes_decrypt_text', - 'aes_decrypt', - 'aes_ecb_decrypt', - 'aes_gcm_decrypt_and_verify', - 'aes_gcm_decrypt_and_verify_bytes', - 'aes_cbc_encrypt', 'aes_cbc_encrypt_bytes', + 'aes_ctr_decrypt', 'aes_ctr_encrypt', + 'aes_decrypt', + 'aes_decrypt_text', + 'aes_ecb_decrypt', 'aes_ecb_encrypt', 'aes_encrypt', - + 'aes_gcm_decrypt_and_verify', + 'aes_gcm_decrypt_and_verify_bytes', 'key_expansion', 'pad_block', 'pkcs7_padding', diff --git a/yt_dlp/cookies.py b/yt_dlp/cookies.py index d5b0d3991b44..fad323c9015f 100644 --- a/yt_dlp/cookies.py +++ b/yt_dlp/cookies.py @@ -195,7 +195,10 @@ def _extract_firefox_cookies(profile, container, logger): def _firefox_browser_dirs(): if sys.platform in ('cygwin', 'win32'): - yield os.path.expandvars(R'%APPDATA%\Mozilla\Firefox\Profiles') + yield from map(os.path.expandvars, ( + R'%APPDATA%\Mozilla\Firefox\Profiles', + R'%LOCALAPPDATA%\Packages\Mozilla.Firefox_n80bbvh6b1yt2\LocalCache\Roaming\Mozilla\Firefox\Profiles', + )) elif sys.platform == 'darwin': yield os.path.expanduser('~/Library/Application Support/Firefox/Profiles') @@ -1276,8 +1279,8 @@ def open(self, file, *, write=False): def _really_save(self, f, ignore_discard, ignore_expires): now = time.time() for cookie in self: - if (not ignore_discard and cookie.discard - or not ignore_expires and cookie.is_expired(now)): + if ((not ignore_discard and cookie.discard) + or (not ignore_expires and cookie.is_expired(now))): continue name, value = cookie.name, cookie.value if value is None: diff --git a/yt_dlp/downloader/hls.py b/yt_dlp/downloader/hls.py index 0a00d5dabbcd..da2574da7237 100644 --- a/yt_dlp/downloader/hls.py +++ b/yt_dlp/downloader/hls.py @@ -119,12 +119,12 @@ def real_download(self, filename, info_dict): self.to_screen(f'[{self.FD_NAME}] Fragment downloads will be delegated to {real_downloader.get_basename()}') def is_ad_fragment_start(s): - return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=ad' in s - or s.startswith('#UPLYNK-SEGMENT') and s.endswith(',ad')) + return ((s.startswith('#ANVATO-SEGMENT-INFO') and 'type=ad' in s) + or (s.startswith('#UPLYNK-SEGMENT') and s.endswith(',ad'))) def is_ad_fragment_end(s): - return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=master' in s - or s.startswith('#UPLYNK-SEGMENT') and s.endswith(',segment')) + return ((s.startswith('#ANVATO-SEGMENT-INFO') and 'type=master' in s) + or (s.startswith('#UPLYNK-SEGMENT') and s.endswith(',segment'))) fragments = [] diff --git a/yt_dlp/downloader/youtube_live_chat.py b/yt_dlp/downloader/youtube_live_chat.py index 961938d4491f..ddd912ca2b66 100644 --- a/yt_dlp/downloader/youtube_live_chat.py +++ b/yt_dlp/downloader/youtube_live_chat.py @@ -123,8 +123,8 @@ def download_and_parse_fragment(url, frag_index, request_data=None, headers=None data, lambda x: x['continuationContents']['liveChatContinuation'], dict) or {} - func = (info_dict['protocol'] == 'youtube_live_chat' and parse_actions_live - or frag_index == 1 and try_refresh_replay_beginning + func = ((info_dict['protocol'] == 'youtube_live_chat' and parse_actions_live) + or (frag_index == 1 and try_refresh_replay_beginning) or parse_actions_replay) return (True, *func(live_chat_continuation)) except HTTPError as err: diff --git a/yt_dlp/extractor/adn.py b/yt_dlp/extractor/adn.py index 919e1d6af514..7dff40556bae 100644 --- a/yt_dlp/extractor/adn.py +++ b/yt_dlp/extractor/adn.py @@ -232,7 +232,7 @@ def _real_extract(self, url): error = self._parse_json(e.cause.response.read(), video_id) message = error.get('message') - if e.cause.code == 403 and error.get('code') == 'player-bad-geolocation-country': + if e.cause.status == 403 and error.get('code') == 'player-bad-geolocation-country': self.raise_geo_restricted(msg=message) raise ExtractorError(message) else: diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py index 02ea67707fcd..2db951a6084d 100644 --- a/yt_dlp/extractor/bilibili.py +++ b/yt_dlp/extractor/bilibili.py @@ -18,7 +18,6 @@ InAdvancePagedList, OnDemandPagedList, bool_or_none, - clean_html, determine_ext, filter_dict, float_or_none, @@ -63,7 +62,7 @@ def _check_missing_formats(self, play_info, formats): 'support_formats', lambda _, v: v['quality'] not in parsed_qualities))], delim=', ') if missing_formats: self.to_screen( - f'Format(s) {missing_formats} are missing; you have to login or ' + f'Format(s) {missing_formats} are missing; you have to ' f'become a premium member to download them. {self._login_hint()}') def extract_formats(self, play_info): @@ -165,14 +164,18 @@ def _sign_wbi(self, params, video_id): params['w_rid'] = hashlib.md5(f'{query}{self._get_wbi_key(video_id)}'.encode()).hexdigest() return params - def _download_playinfo(self, bvid, cid, headers=None, qn=None): - params = {'bvid': bvid, 'cid': cid, 'fnval': 4048} - if qn: - params['qn'] = qn + def _download_playinfo(self, bvid, cid, headers=None, query=None): + params = {'bvid': bvid, 'cid': cid, 'fnval': 4048, **(query or {})} + if self.is_logged_in: + params.pop('try_look', None) + if qn := params.get('qn'): + note = f'Downloading video format {qn} for cid {cid}' + else: + note = f'Downloading video formats for cid {cid}' + return self._download_json( 'https://api.bilibili.com/x/player/wbi/playurl', bvid, - query=self._sign_wbi(params, bvid), headers=headers, - note=f'Downloading video formats for cid {cid} {qn or ""}')['data'] + query=self._sign_wbi(params, bvid), headers=headers, note=note)['data'] def json2srt(self, json_data): srt_data = '' @@ -191,7 +194,7 @@ def _get_subtitles(self, video_id, cid, aid=None): } video_info = self._download_json( - 'https://api.bilibili.com/x/player/v2', video_id, + 'https://api.bilibili.com/x/player/wbi/v2', video_id, query={'aid': aid, 'cid': cid} if aid else {'bvid': video_id, 'cid': cid}, note=f'Extracting subtitle info {cid}', headers=self._HEADERS) if traverse_obj(video_info, ('data', 'need_login_subtitle')): @@ -207,7 +210,7 @@ def _get_subtitles(self, video_id, cid, aid=None): def _get_chapters(self, aid, cid): chapters = aid and cid and self._download_json( - 'https://api.bilibili.com/x/player/v2', aid, query={'aid': aid, 'cid': cid}, + 'https://api.bilibili.com/x/player/wbi/v2', aid, query={'aid': aid, 'cid': cid}, note='Extracting chapters', fatal=False, headers=self._HEADERS) return traverse_obj(chapters, ('data', 'view_points', ..., { 'title': 'content', @@ -286,7 +289,7 @@ def _get_interactive_entries(self, video_id, cid, metainfo, headers=None): ('data', 'interaction', 'graph_version', {int_or_none})) cid_edges = self._get_divisions(video_id, graph_version, {1: {'cid': cid}}, 1) for cid, edges in cid_edges.items(): - play_info = self._download_playinfo(video_id, cid, headers=headers) + play_info = self._download_playinfo(video_id, cid, headers=headers, query={'try_look': 1}) yield { **metainfo, 'id': f'{video_id}_{cid}', @@ -639,40 +642,29 @@ def _real_extract(self, url): headers['Referer'] = url initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', video_id) + + if traverse_obj(initial_state, ('error', 'trueCode')) == -403: + self.raise_login_required() + if traverse_obj(initial_state, ('error', 'trueCode')) == -404: + raise ExtractorError( + 'This video may be deleted or geo-restricted. ' + 'You might want to try a VPN or a proxy server (with --proxy)', expected=True) + is_festival = 'videoData' not in initial_state if is_festival: video_data = initial_state['videoInfo'] else: - play_info_obj = self._search_json( - r'window\.__playinfo__\s*=', webpage, 'play info', video_id, fatal=False) - if not play_info_obj: - if traverse_obj(initial_state, ('error', 'trueCode')) == -403: - self.raise_login_required() - if traverse_obj(initial_state, ('error', 'trueCode')) == -404: - raise ExtractorError( - 'This video may be deleted or geo-restricted. ' - 'You might want to try a VPN or a proxy server (with --proxy)', expected=True) - play_info = traverse_obj(play_info_obj, ('data', {dict})) - if not play_info: - if traverse_obj(play_info_obj, 'code') == 87007: - toast = get_element_by_class('tips-toast', webpage) or '' - msg = clean_html( - f'{get_element_by_class("belongs-to", toast) or ""},' - + (get_element_by_class('level', toast) or '')) - raise ExtractorError( - f'This is a supporter-only video: {msg}. {self._login_hint()}', expected=True) - raise ExtractorError('Failed to extract play info') video_data = initial_state['videoData'] video_id, title = video_data['bvid'], video_data.get('title') # Bilibili anthologies are similar to playlists but all videos share the same video ID as the anthology itself. - page_list_json = not is_festival and traverse_obj( + page_list_json = (not is_festival and traverse_obj( self._download_json( 'https://api.bilibili.com/x/player/pagelist', video_id, fatal=False, query={'bvid': video_id, 'jsonp': 'jsonp'}, note='Extracting videos in anthology', headers=headers), - 'data', expected_type=list) or [] + 'data', expected_type=list)) or [] is_anthology = len(page_list_json) > 1 part_id = int_or_none(parse_qs(url).get('p', [None])[-1]) @@ -691,8 +683,6 @@ def _real_extract(self, url): festival_info = {} if is_festival: - play_info = self._download_playinfo(video_id, cid, headers=headers) - festival_info = traverse_obj(initial_state, { 'uploader': ('videoInfo', 'upName'), 'uploader_id': ('videoInfo', 'upMid', {str_or_none}), @@ -727,62 +717,79 @@ def _real_extract(self, url): self._get_interactive_entries(video_id, cid, metainfo, headers=headers), **metainfo, duration=traverse_obj(initial_state, ('videoData', 'duration', {int_or_none})), __post_extractor=self.extract_comments(aid)) - else: - formats = self.extract_formats(play_info) - - if not traverse_obj(play_info, ('dash')): - # we only have legacy formats and need additional work - has_qn = lambda x: x in traverse_obj(formats, (..., 'quality')) - for qn in traverse_obj(play_info, ('accept_quality', lambda _, v: not has_qn(v), {int})): - formats.extend(traverse_obj( - self.extract_formats(self._download_playinfo(video_id, cid, headers=headers, qn=qn)), - lambda _, v: not has_qn(v['quality']))) - self._check_missing_formats(play_info, formats) - flv_formats = traverse_obj(formats, lambda _, v: v['fragments']) - if flv_formats and len(flv_formats) < len(formats): - # Flv and mp4 are incompatible due to `multi_video` workaround, so drop one - if not self._configuration_arg('prefer_multi_flv'): - dropped_fmts = ', '.join( - f'{f.get("format_note")} ({f.get("format_id")})' for f in flv_formats) - formats = traverse_obj(formats, lambda _, v: not v.get('fragments')) - if dropped_fmts: - self.to_screen( - f'Dropping incompatible flv format(s) {dropped_fmts} since mp4 is available. ' - 'To extract flv, pass --extractor-args "bilibili:prefer_multi_flv"') - else: - formats = traverse_obj( - # XXX: Filtering by extractor-arg is for testing purposes - formats, lambda _, v: v['quality'] == int(self._configuration_arg('prefer_multi_flv')[0]), - ) or [max(flv_formats, key=lambda x: x['quality'])] - - if traverse_obj(formats, (0, 'fragments')): - # We have flv formats, which are individual short videos with their own timestamps and metainfo - # Binary concatenation corrupts their timestamps, so we need a `multi_video` workaround - return { - **metainfo, - '_type': 'multi_video', - 'entries': [{ - 'id': f'{metainfo["id"]}_{idx}', - 'title': metainfo['title'], - 'http_headers': metainfo['http_headers'], - 'formats': [{ - **fragment, - 'format_id': formats[0].get('format_id'), - }], - 'subtitles': self.extract_subtitles(video_id, cid) if idx == 0 else None, - '__post_extractor': self.extract_comments(aid) if idx == 0 else None, - } for idx, fragment in enumerate(formats[0]['fragments'])], - 'duration': float_or_none(play_info.get('timelength'), scale=1000), - } - else: - return { - **metainfo, - 'formats': formats, - 'duration': float_or_none(play_info.get('timelength'), scale=1000), - 'chapters': self._get_chapters(aid, cid), - 'subtitles': self.extract_subtitles(video_id, cid), - '__post_extractor': self.extract_comments(aid), - } + + play_info = None + if self.is_logged_in: + play_info = traverse_obj( + self._search_json(r'window\.__playinfo__\s*=', webpage, 'play info', video_id, default=None), + ('data', {dict})) + if not play_info: + play_info = self._download_playinfo(video_id, cid, headers=headers, query={'try_look': 1}) + formats = self.extract_formats(play_info) + + if video_data.get('is_upower_exclusive'): + high_level = traverse_obj(initial_state, ('elecFullInfo', 'show_info', 'high_level', {dict})) or {} + msg = f'{join_nonempty("title", "sub_title", from_dict=high_level, delim=",")}. {self._login_hint()}' + if not formats: + raise ExtractorError(f'This is a supporter-only video: {msg}', expected=True) + if '试看' in traverse_obj(play_info, ('accept_description', ..., {str})): + self.report_warning( + f'This is a supporter-only video, only the preview will be extracted: {msg}', + video_id=video_id) + + if not traverse_obj(play_info, 'dash'): + # we only have legacy formats and need additional work + has_qn = lambda x: x in traverse_obj(formats, (..., 'quality')) + for qn in traverse_obj(play_info, ('accept_quality', lambda _, v: not has_qn(v), {int})): + formats.extend(traverse_obj( + self.extract_formats(self._download_playinfo(video_id, cid, headers=headers, query={'qn': qn})), + lambda _, v: not has_qn(v['quality']))) + self._check_missing_formats(play_info, formats) + flv_formats = traverse_obj(formats, lambda _, v: v['fragments']) + if flv_formats and len(flv_formats) < len(formats): + # Flv and mp4 are incompatible due to `multi_video` workaround, so drop one + if not self._configuration_arg('prefer_multi_flv'): + dropped_fmts = ', '.join( + f'{f.get("format_note")} ({f.get("format_id")})' for f in flv_formats) + formats = traverse_obj(formats, lambda _, v: not v.get('fragments')) + if dropped_fmts: + self.to_screen( + f'Dropping incompatible flv format(s) {dropped_fmts} since mp4 is available. ' + 'To extract flv, pass --extractor-args "bilibili:prefer_multi_flv"') + else: + formats = traverse_obj( + # XXX: Filtering by extractor-arg is for testing purposes + formats, lambda _, v: v['quality'] == int(self._configuration_arg('prefer_multi_flv')[0]), + ) or [max(flv_formats, key=lambda x: x['quality'])] + + if traverse_obj(formats, (0, 'fragments')): + # We have flv formats, which are individual short videos with their own timestamps and metainfo + # Binary concatenation corrupts their timestamps, so we need a `multi_video` workaround + return { + **metainfo, + '_type': 'multi_video', + 'entries': [{ + 'id': f'{metainfo["id"]}_{idx}', + 'title': metainfo['title'], + 'http_headers': metainfo['http_headers'], + 'formats': [{ + **fragment, + 'format_id': formats[0].get('format_id'), + }], + 'subtitles': self.extract_subtitles(video_id, cid) if idx == 0 else None, + '__post_extractor': self.extract_comments(aid) if idx == 0 else None, + } for idx, fragment in enumerate(formats[0]['fragments'])], + 'duration': float_or_none(play_info.get('timelength'), scale=1000), + } + + return { + **metainfo, + 'formats': formats, + 'duration': float_or_none(play_info.get('timelength'), scale=1000), + 'chapters': self._get_chapters(aid, cid), + 'subtitles': self.extract_subtitles(video_id, cid), + '__post_extractor': self.extract_comments(aid), + } class BiliBiliBangumiIE(BilibiliBaseIE): @@ -860,10 +867,16 @@ def _real_extract(self, url): self.raise_login_required('This video is for premium members only') headers['Referer'] = url - play_info = self._download_json( - 'https://api.bilibili.com/pgc/player/web/v2/playurl', episode_id, - 'Extracting episode', query={'fnval': '4048', 'ep_id': episode_id}, - headers=headers) + + play_info = ( + self._search_json( + r'playurlSSRData\s*=', webpage, 'embedded page info', episode_id, + end_pattern='\n', default=None) + or self._download_json( + 'https://api.bilibili.com/pgc/player/web/v2/playurl', episode_id, + 'Extracting episode', query={'fnval': 12240, 'ep_id': episode_id}, + headers=headers)) + premium_only = play_info.get('code') == -10403 play_info = traverse_obj(play_info, ('result', 'video_info', {dict})) or {} diff --git a/yt_dlp/extractor/brightcove.py b/yt_dlp/extractor/brightcove.py index 2526f25dac75..3ada1fd5deb8 100644 --- a/yt_dlp/extractor/brightcove.py +++ b/yt_dlp/extractor/brightcove.py @@ -31,6 +31,7 @@ update_url_query, url_or_none, ) +from ..utils.traversal import traverse_obj class BrightcoveLegacyIE(InfoExtractor): @@ -935,8 +936,8 @@ def extract_policy_key(): if content_type == 'playlist': return self.playlist_result( - [self._parse_brightcove_metadata(vid, vid.get('id'), headers) - for vid in json_data.get('videos', []) if vid.get('id')], + (self._parse_brightcove_metadata(vid, vid['id'], headers) + for vid in traverse_obj(json_data, ('videos', lambda _, v: v['id']))), json_data.get('id'), json_data.get('name'), json_data.get('description')) diff --git a/yt_dlp/extractor/chaturbate.py b/yt_dlp/extractor/chaturbate.py index a40b7d39c7f4..d031d3985e33 100644 --- a/yt_dlp/extractor/chaturbate.py +++ b/yt_dlp/extractor/chaturbate.py @@ -59,17 +59,16 @@ def _extract_from_api(self, video_id, tld): 'Accept': 'application/json', }, fatal=False, impersonate=True) or {} - status = response.get('room_status') - if status != 'public': + m3u8_url = response.get('url') + if not m3u8_url: + status = response.get('room_status') if error := self._ERROR_MAP.get(status): raise ExtractorError(error, expected=True) - self.report_warning('Falling back to webpage extraction') + if status == 'public': + self.raise_geo_restricted() + self.report_warning(f'Got status "{status}" from API; falling back to webpage extraction') return None - m3u8_url = response.get('url') - if not m3u8_url: - self.raise_geo_restricted() - return { 'id': video_id, 'title': video_id, diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 28a3adf9361f..92ddad2b76ab 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -1854,12 +1854,26 @@ def _check_formats(self, formats, video_id): @staticmethod def _remove_duplicate_formats(formats): - format_urls = set() + seen_urls = set() + seen_fragment_urls = set() unique_formats = [] for f in formats: - if f['url'] not in format_urls: - format_urls.add(f['url']) + fragments = f.get('fragments') + if callable(fragments): unique_formats.append(f) + + elif fragments: + fragment_urls = frozenset( + fragment.get('url') or urljoin(f['fragment_base_url'], fragment['path']) + for fragment in fragments) + if fragment_urls not in seen_fragment_urls: + seen_fragment_urls.add(fragment_urls) + unique_formats.append(f) + + elif f['url'] not in seen_urls: + seen_urls.add(f['url']) + unique_formats.append(f) + formats[:] = unique_formats def _is_valid_url(self, url, video_id, item='video', headers={}): @@ -3789,7 +3803,7 @@ def _cookies_passed(self): def mark_watched(self, *args, **kwargs): if not self.get_param('mark_watched', False): return - if self.supports_login() and self._get_login_info()[0] is not None or self._cookies_passed: + if (self.supports_login() and self._get_login_info()[0] is not None) or self._cookies_passed: self._mark_watched(*args, **kwargs) def _mark_watched(self, *args, **kwargs): diff --git a/yt_dlp/extractor/cultureunplugged.py b/yt_dlp/extractor/cultureunplugged.py index 8e6579c3552a..c7ccd2747951 100644 --- a/yt_dlp/extractor/cultureunplugged.py +++ b/yt_dlp/extractor/cultureunplugged.py @@ -1,7 +1,4 @@ -import time - from .common import InfoExtractor -from ..networking import HEADRequest from ..utils import int_or_none @@ -31,9 +28,6 @@ def _real_extract(self, url): video_id = mobj.group('id') display_id = mobj.group('display_id') or video_id - # request setClientTimezone.php to get PHPSESSID cookie which is need to get valid json data in the next request - self._request_webpage(HEADRequest( - 'http://www.cultureunplugged.com/setClientTimezone.php?timeOffset=%d' % -(time.timezone / 3600)), display_id) movie_data = self._download_json( f'http://www.cultureunplugged.com/movie-data/cu-{video_id}.json', display_id) diff --git a/yt_dlp/extractor/dacast.py b/yt_dlp/extractor/dacast.py index 4e81aa4a7bca..537352e5f78b 100644 --- a/yt_dlp/extractor/dacast.py +++ b/yt_dlp/extractor/dacast.py @@ -1,3 +1,4 @@ +import functools import hashlib import re import time @@ -51,6 +52,15 @@ class DacastVODIE(DacastBaseIE): 'thumbnail': 'https://universe-files.dacast.com/26137208-5858-65c1-5e9a-9d6b6bd2b6c2', }, 'params': {'skip_download': 'm3u8'}, + }, { # /uspaes/ in hls_url + 'url': 'https://iframe.dacast.com/vod/f9823fc6-faba-b98f-0d00-4a7b50a58c5b/348c5c84-b6af-4859-bb9d-1d01009c795b', + 'info_dict': { + 'id': '348c5c84-b6af-4859-bb9d-1d01009c795b', + 'ext': 'mp4', + 'title': 'pl1-edyta-rubas-211124.mp4', + 'uploader_id': 'f9823fc6-faba-b98f-0d00-4a7b50a58c5b', + 'thumbnail': 'https://universe-files.dacast.com/4d0bd042-a536-752d-fc34-ad2fa44bbcbb.png', + }, }] _WEBPAGE_TESTS = [{ 'url': 'https://www.dacast.com/support/knowledgebase/how-can-i-embed-a-video-on-my-website/', @@ -74,6 +84,15 @@ class DacastVODIE(DacastBaseIE): 'params': {'skip_download': 'm3u8'}, }] + @functools.cached_property + def _usp_signing_secret(self): + player_js = self._download_webpage( + 'https://player.dacast.com/js/player.js', None, 'Downloading player JS') + # Rotates every so often, but hardcode a fallback in case of JS change/breakage before rotation + return self._search_regex( + r'\bUSP_SIGNING_SECRET\s*=\s*(["\'])(?P(?:(?!\1).)+)', player_js, + 'usp signing secret', group='secret', fatal=False) or 'odnInCGqhvtyRTtIiddxtuRtawYYICZP' + def _real_extract(self, url): user_id, video_id = self._match_valid_url(url).group('user_id', 'id') query = {'contentId': f'{user_id}-vod-{video_id}', 'provider': 'universe'} @@ -94,10 +113,10 @@ def _real_extract(self, url): if 'DRM_EXT' in hls_url: self.report_drm(video_id) elif '/uspaes/' in hls_url: - # From https://player.dacast.com/js/player.js + # Ref: https://player.dacast.com/js/player.js ts = int(time.time()) signature = hashlib.sha1( - f'{10413792000 - ts}{ts}YfaKtquEEpDeusCKbvYszIEZnWmBcSvw').digest().hex() + f'{10413792000 - ts}{ts}{self._usp_signing_secret}'.encode()).digest().hex() hls_aes['uri'] = f'https://keys.dacast.com/uspaes/{video_id}.key?s={signature}&ts={ts}' for retry in self.RetryManager(): diff --git a/yt_dlp/extractor/dailymotion.py b/yt_dlp/extractor/dailymotion.py index cb1453d3f508..423c11c5734d 100644 --- a/yt_dlp/extractor/dailymotion.py +++ b/yt_dlp/extractor/dailymotion.py @@ -261,6 +261,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor): 'tags': [], 'view_count': int, 'like_count': int, + 'thumbnail': r're:https://\w+.dmcdn.net/v/WnEY61cmvMxt2Fi6d/x1080', }, }, { # https://geo.dailymotion.com/player/xf7zn.html?playlist=x7wdsj @@ -288,6 +289,25 @@ class DailymotionIE(DailymotionBaseInfoExtractor): 'description': 'À bord du « véloto », l’alternative à la voiture pour la campagne', 'tags': ['biclou', 'vélo', 'véloto', 'campagne', 'voiture', 'environnement', 'véhicules intermédiaires'], }, + }, { + # https://geo.dailymotion.com/player/xry80.html?video=x8vu47w + 'url': 'https://www.metatube.com/en/videos/546765/This-frogs-decorates-Christmas-tree/', + 'info_dict': { + 'id': 'x8vu47w', + 'ext': 'mp4', + 'like_count': int, + 'uploader': 'Metatube', + 'thumbnail': r're:https://\w+.dmcdn.net/v/W1G_S1coGSFTfkTeR/x1080', + 'upload_date': '20240326', + 'view_count': int, + 'timestamp': 1711496732, + 'age_limit': 0, + 'uploader_id': 'x2xpy74', + 'title': 'Está lindas ranitas ponen su arbolito', + 'duration': 28, + 'description': 'Que lindura', + 'tags': [], + }, }] _GEO_BYPASS = False _COMMON_MEDIA_FIELDS = '''description @@ -302,7 +322,7 @@ def _extract_embed_urls(cls, url, webpage): yield from super()._extract_embed_urls(url, webpage) for mobj in re.finditer( r'(?s)DM\.player\([^,]+,\s*{.*?video[\'"]?\s*:\s*["\']?(?P[0-9a-zA-Z]+).+?}\s*\);', webpage): - yield from 'https://www.dailymotion.com/embed/video/' + mobj.group('id') + yield 'https://www.dailymotion.com/embed/video/' + mobj.group('id') for mobj in re.finditer( r'(?s)