Skip to content

Commit

Permalink
Merge branch 'yt-dlp:master' into master
Browse files Browse the repository at this point in the history
  • Loading branch information
observeroftime01 authored May 13, 2024
2 parents 3821d7d + 85ec2a3 commit aee076a
Show file tree
Hide file tree
Showing 20 changed files with 187 additions and 777 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -1758,7 +1758,7 @@ The following extractors use this feature:
#### youtube
* `lang`: Prefer translated metadata (`title`, `description` etc) of this language code (case-sensitive). By default, the video primary language metadata is preferred, with a fallback to `en` translated. See [youtube.py](https://github.com/yt-dlp/yt-dlp/blob/c26f9b991a0681fd3ea548d535919cec1fbbd430/yt_dlp/extractor/youtube.py#L381-L390) for list of supported content language codes
* `skip`: One or more of `hls`, `dash` or `translated_subs` to skip extraction of the m3u8 manifests, dash manifests and [auto-translated subtitles](https://github.com/yt-dlp/yt-dlp/issues/4090#issuecomment-1158102032) respectively
* `player_client`: Clients to extract video data from. The main clients are `web`, `android` and `ios` with variants `_music`, `_embedded`, `_embedscreen`, `_creator` (e.g. `web_embedded`); and `mweb`, `mweb_embedscreen` and `tv_embedded` (agegate bypass) with no variants. By default, `ios,android,web` is used, but `tv_embedded` and `creator` variants are added as required for age-gated videos. Similarly, the music variants are added for `music.youtube.com` urls. You can use `all` to use all the clients, and `default` for the default clients.
* `player_client`: Clients to extract video data from. The main clients are `web`, `android` and `ios` with variants `_music`, `_embedded`, `_embedscreen`, `_creator` (e.g. `web_embedded`); and `mweb`, `mweb_embedscreen`, `mediaconnect` and `tv_embedded` (agegate bypass) with no variants. By default, `ios,android,web` is used, but `tv_embedded` and `creator` variants are added as required for age-gated videos. Similarly, the music variants are added for `music.youtube.com` urls. You can use `all` to use all the clients, and `default` for the default clients.
* `player_skip`: Skip some network requests that are generally needed for robust extraction. One or more of `configs` (skip client configs), `webpage` (skip initial webpage), `js` (skip js player). While these options can help reduce the number of requests needed or avoid some rate-limiting, they could cause some issues. See [#860](https://github.com/yt-dlp/yt-dlp/pull/860) for more details
* `player_params`: YouTube player parameters to use for player requests. Will overwrite any default ones set by yt-dlp.
* `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side)
Expand Down
2 changes: 1 addition & 1 deletion yt_dlp/YoutubeDL.py
Original file line number Diff line number Diff line change
Expand Up @@ -3071,7 +3071,7 @@ def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
f = formats[-1]
self.report_warning(
'No subtitle format found matching "%s" for language %s, '
'using %s' % (formats_query, lang, f['ext']))
'using %s. Use --list-subs for a list of available subtitles' % (formats_query, lang, f['ext']))
subs[lang] = f
return subs

Expand Down
5 changes: 5 additions & 0 deletions yt_dlp/cookies.py
Original file line number Diff line number Diff line change
Expand Up @@ -347,6 +347,11 @@ def _process_chrome_cookie(decryptor, host_key, name, value, encrypted_value, pa
if value is None:
return is_encrypted, None

# In chrome, session cookies have expires_utc set to 0
# In our cookie-store, cookies that do not expire should have expires set to None
if not expires_utc:
expires_utc = None

return is_encrypted, http.cookiejar.Cookie(
version=0, name=name, value=value, port=None, port_specified=False,
domain=host_key, domain_specified=bool(host_key), domain_initial_dot=host_key.startswith('.'),
Expand Down
10 changes: 0 additions & 10 deletions yt_dlp/extractor/_extractors.py
Original file line number Diff line number Diff line change
Expand Up @@ -288,7 +288,6 @@
from .buzzfeed import BuzzFeedIE
from .byutv import BYUtvIE
from .c56 import C56IE
from .cableav import CableAVIE
from .callin import CallinIE
from .caltrans import CaltransIE
from .cam4 import CAM4IE
Expand Down Expand Up @@ -548,7 +547,6 @@
EggheadLessonIE,
)
from .eighttracks import EightTracksIE
from .einthusan import EinthusanIE
from .eitb import EitbIE
from .elementorembed import ElementorEmbedIE
from .elonet import ElonetIE
Expand Down Expand Up @@ -861,10 +859,6 @@
)
from .ixigua import IxiguaIE
from .izlesene import IzleseneIE
from .jable import (
JableIE,
JablePlaylistIE,
)
from .jamendo import (
JamendoIE,
JamendoAlbumIE,
Expand Down Expand Up @@ -1499,7 +1493,6 @@
)
from .popcorntimes import PopcorntimesIE
from .popcorntv import PopcornTVIE
from .porn91 import Porn91IE
from .pornbox import PornboxIE
from .pornflip import PornFlipIE
from .pornhub import (
Expand Down Expand Up @@ -2377,7 +2370,6 @@
)
from .xanimu import XanimuIE
from .xboxclips import XboxClipsIE
from .xfileshare import XFileShareIE
from .xhamster import (
XHamsterIE,
XHamsterEmbedIE,
Expand Down Expand Up @@ -2432,8 +2424,6 @@
YouNowMomentIE,
)
from .youporn import YouPornIE
from .yourporn import YourPornIE
from .yourupload import YourUploadIE
from .zaiko import (
ZaikoIE,
ZaikoETicketIE,
Expand Down
4 changes: 2 additions & 2 deletions yt_dlp/extractor/alura.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ class AluraIE(InfoExtractor):

def _real_extract(self, url):

course, video_id = self._match_valid_url(url)
course, video_id = self._match_valid_url(url).group('course_name', 'id')
video_url = self._VIDEO_URL % (course, video_id)

video_dict = self._download_json(video_url, video_id, 'Searching for videos')
Expand All @@ -52,7 +52,7 @@ def _real_extract(self, url):

formats = []
for video_obj in video_dict:
video_url_m3u8 = video_obj.get('link')
video_url_m3u8 = video_obj.get('mp4')
video_format = self._extract_m3u8_formats(
video_url_m3u8, None, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False)
Expand Down
32 changes: 0 additions & 32 deletions yt_dlp/extractor/cableav.py

This file was deleted.

105 changes: 0 additions & 105 deletions yt_dlp/extractor/einthusan.py

This file was deleted.

20 changes: 18 additions & 2 deletions yt_dlp/extractor/europa.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,13 +94,14 @@ def get_item(type_, preference):

class EuroParlWebstreamIE(InfoExtractor):
_VALID_URL = r'''(?x)
https?://multimedia\.europarl\.europa\.eu/[^/#?]+/
(?:(?!video)[^/#?]+/[\w-]+_)(?P<id>[\w-]+)
https?://multimedia\.europarl\.europa\.eu/
(?:\w+/)?webstreaming/(?:[\w-]+_)?(?P<id>[\w-]+)
'''
_TESTS = [{
'url': 'https://multimedia.europarl.europa.eu/pl/webstreaming/plenary-session_20220914-0900-PLENARY',
'info_dict': {
'id': '62388b15-d85b-4add-99aa-ba12ccf64f0d',
'display_id': '20220914-0900-PLENARY',
'ext': 'mp4',
'title': 'Plenary session',
'release_timestamp': 1663139069,
Expand All @@ -125,6 +126,7 @@ class EuroParlWebstreamIE(InfoExtractor):
'url': 'https://multimedia.europarl.europa.eu/en/webstreaming/committee-on-culture-and-education_20230301-1130-COMMITTEE-CULT',
'info_dict': {
'id': '7355662c-8eac-445e-4bb9-08db14b0ddd7',
'display_id': '20230301-1130-COMMITTEE-CULT',
'ext': 'mp4',
'release_date': '20230301',
'title': 'Committee on Culture and Education',
Expand All @@ -142,6 +144,19 @@ class EuroParlWebstreamIE(InfoExtractor):
'live_status': 'is_live',
},
'skip': 'Not live anymore'
}, {
'url': 'https://multimedia.europarl.europa.eu/en/webstreaming/20240320-1345-SPECIAL-PRESSER',
'info_dict': {
'id': 'c1f11567-5b52-470a-f3e1-08dc3c216ace',
'display_id': '20240320-1345-SPECIAL-PRESSER',
'ext': 'mp4',
'release_date': '20240320',
'title': 'md5:7c6c814cac55dea5e2d87bf8d3db2234',
'release_timestamp': 1710939767,
}
}, {
'url': 'https://multimedia.europarl.europa.eu/webstreaming/briefing-for-media-on-2024-european-elections_20240429-1000-SPECIAL-OTHER',
'only_matching': True,
}]

def _real_extract(self, url):
Expand All @@ -166,6 +181,7 @@ def _real_extract(self, url):

return {
'id': json_info['id'],
'display_id': display_id,
'title': traverse_obj(webpage_nextjs, (('mediaItem', 'title'), ('title', )), get_all=False),
'formats': formats,
'subtitles': subtitles,
Expand Down
36 changes: 19 additions & 17 deletions yt_dlp/extractor/googledrive.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
import re

from .common import InfoExtractor
from .youtube import YoutubeIE
from ..compat import compat_parse_qs
from ..utils import (
ExtractorError,
bug_reports_message,
determine_ext,
extract_attributes,
get_element_by_class,
Expand Down Expand Up @@ -38,6 +40,17 @@ class GoogleDriveIE(InfoExtractor):
'duration': 45,
'thumbnail': 'https://drive.google.com/thumbnail?id=0ByeS4oOUV-49Zzh4R1J6R09zazQ',
}
}, {
# has itag 50 which is not in YoutubeIE._formats (royalty Free music from 1922)
'url': 'https://drive.google.com/uc?id=1IP0o8dHcQrIHGgVyp0Ofvx2cGfLzyO1x',
'md5': '322db8d63dd19788c04050a4bba67073',
'info_dict': {
'id': '1IP0o8dHcQrIHGgVyp0Ofvx2cGfLzyO1x',
'ext': 'mp3',
'title': 'My Buddy - Henry Burr - Gus Kahn - Walter Donaldson.mp3',
'duration': 184,
'thumbnail': 'https://drive.google.com/thumbnail?id=1IP0o8dHcQrIHGgVyp0Ofvx2cGfLzyO1x',
},
}, {
# video can't be watched anonymously due to view count limit reached,
# but can be downloaded (see https://github.com/ytdl-org/youtube-dl/issues/14046)
Expand All @@ -58,22 +71,8 @@ class GoogleDriveIE(InfoExtractor):
'only_matching': True,
}]
_FORMATS_EXT = {
'5': 'flv',
'6': 'flv',
'13': '3gp',
'17': '3gp',
'18': 'mp4',
'22': 'mp4',
'34': 'flv',
'35': 'flv',
'36': '3gp',
'37': 'mp4',
'38': 'mp4',
'43': 'webm',
'44': 'webm',
'45': 'webm',
'46': 'webm',
'59': 'mp4',
**{k: v['ext'] for k, v in YoutubeIE._formats.items() if v.get('ext')},
'50': 'm4a',
}
_BASE_URL_CAPTIONS = 'https://drive.google.com/timedtext'
_CAPTIONS_ENTRY_TAG = {
Expand Down Expand Up @@ -194,10 +193,13 @@ def get_value(key):
if len(fmt_stream_split) < 2:
continue
format_id, format_url = fmt_stream_split[:2]
ext = self._FORMATS_EXT.get(format_id)
if not ext:
self.report_warning(f'Unknown format {format_id}{bug_reports_message()}')
f = {
'url': lowercase_escape(format_url),
'format_id': format_id,
'ext': self._FORMATS_EXT[format_id],
'ext': ext,
}
resolution = resolutions.get(format_id)
if resolution:
Expand Down
5 changes: 3 additions & 2 deletions yt_dlp/extractor/hytale.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import re

from .cloudflarestream import CloudflareStreamIE
from .common import InfoExtractor
from ..utils import traverse_obj
from ..utils.traversal import traverse_obj


class HytaleIE(InfoExtractor):
Expand Down Expand Up @@ -49,7 +50,7 @@ def _real_extract(self, url):
entries = [
self.url_result(
f'https://cloudflarestream.com/{video_hash}/manifest/video.mpd?parentOrigin=https%3A%2F%2Fhytale.com',
title=self._titles.get(video_hash), url_transparent=True)
CloudflareStreamIE, title=self._titles.get(video_hash), url_transparent=True)
for video_hash in re.findall(
r'<stream\s+class\s*=\s*"ql-video\s+cf-stream"\s+src\s*=\s*"([a-f0-9]{32})"',
webpage)
Expand Down
Loading

0 comments on commit aee076a

Please sign in to comment.