Skip to content

Commit

Permalink
Merge branch 'yt-dlp:master' into master
Browse files Browse the repository at this point in the history
  • Loading branch information
observeroftime01 authored Aug 2, 2023
2 parents 83b2500 + db97438 commit 956e5d6
Show file tree
Hide file tree
Showing 6 changed files with 173 additions and 16 deletions.
3 changes: 2 additions & 1 deletion yt_dlp/extractor/_extractors.py
Original file line number Diff line number Diff line change
Expand Up @@ -1420,7 +1420,7 @@
PatreonIE,
PatreonCampaignIE
)
from .pbs import PBSIE
from .pbs import PBSIE, PBSKidsIE
from .pearvideo import PearVideoIE
from .peekvids import PeekVidsIE, PlayVidsIE
from .peertube import (
Expand Down Expand Up @@ -1709,6 +1709,7 @@
RuvIE,
RuvSpilaIE
)
from .s4c import S4CIE
from .safari import (
SafariIE,
SafariApiIE,
Expand Down
3 changes: 2 additions & 1 deletion yt_dlp/extractor/facebook.py
Original file line number Diff line number Diff line change
Expand Up @@ -481,7 +481,8 @@ def extract_dash_manifest(video, formats):
dash_manifest = video.get('dash_manifest')
if dash_manifest:
formats.extend(self._parse_mpd_formats(
compat_etree_fromstring(urllib.parse.unquote_plus(dash_manifest))))
compat_etree_fromstring(urllib.parse.unquote_plus(dash_manifest)),
mpd_url=video.get('dash_manifest_url')))

def process_formats(info):
# Downloads with browser's User-Agent are rate limited. Working around
Expand Down
6 changes: 5 additions & 1 deletion yt_dlp/extractor/fox.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@


class FOXIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?fox\.com/watch/(?P<id>[\da-fA-F]+)'
_VALID_URL = r'https?://(?:www\.)?fox(?:sports)?\.com/(?:watch|replay)/(?P<id>[\da-fA-F]+)'
_TESTS = [{
# clip
'url': 'https://www.fox.com/watch/4b765a60490325103ea69888fb2bd4e8/',
Expand Down Expand Up @@ -50,6 +50,10 @@ class FOXIE(InfoExtractor):
# sports event, geo-restricted
'url': 'https://www.fox.com/watch/b057484dade738d1f373b3e46216fa2c/',
'only_matching': True,
}, {
# fox sports replay, geo-restricted
'url': 'https://www.foxsports.com/replay/561f3e071347a24e5e877abc56b22e89',
'only_matching': True,
}]
_GEO_BYPASS = False
_HOME_PAGE_URL = 'https://www.fox.com/'
Expand Down
59 changes: 59 additions & 0 deletions yt_dlp/extractor/pbs.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
orderedSet,
strip_jsonp,
strip_or_none,
traverse_obj,
unified_strdate,
url_or_none,
US_RATINGS,
Expand Down Expand Up @@ -696,3 +697,61 @@ def extract_redirect_urls(info):
'subtitles': subtitles,
'chapters': chapters,
}


class PBSKidsIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?pbskids\.org/video/[\w-]+/(?P<id>\d+)'
_TESTS = [
{
'url': 'https://pbskids.org/video/molly-of-denali/3030407927',
'md5': '1ded20a017cc6b53446238f1804ce4c7',
'info_dict': {
'id': '3030407927',
'title': 'Bird in the Hand/Bye-Bye Birdie',
'channel': 'molly-of-denali',
'duration': 1540,
'ext': 'mp4',
'series': 'Molly of Denali',
'description': 'md5:d006b2211633685d8ebc8d03b6d5611e',
'categories': ['Episode'],
'upload_date': '20190718',
}
},
{
'url': 'https://pbskids.org/video/plum-landing/2365205059',
'md5': '92e5d189851a64ae1d0237a965be71f5',
'info_dict': {
'id': '2365205059',
'title': 'Cooper\'s Favorite Place in Nature',
'channel': 'plum-landing',
'duration': 67,
'ext': 'mp4',
'series': 'Plum Landing',
'description': 'md5:657e5fc4356a84ead1c061eb280ff05d',
'categories': ['Episode'],
'upload_date': '20140302',
}
}
]

def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
meta = self._search_json(r'window\._PBS_KIDS_DEEPLINK\s*=', webpage, 'video info', video_id)
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
traverse_obj(meta, ('video_obj', 'URI', {url_or_none})), video_id, ext='mp4')

return {
'id': video_id,
'formats': formats,
'subtitles': subtitles,
**traverse_obj(meta, {
'categories': ('video_obj', 'video_type', {str}, {lambda x: [x] if x else None}),
'channel': ('show_slug', {str}),
'description': ('video_obj', 'description', {str}),
'duration': ('video_obj', 'duration', {int_or_none}),
'series': ('video_obj', 'program_title', {str}),
'title': ('video_obj', 'title', {str}),
'upload_date': ('video_obj', 'air_date', {unified_strdate}),
})
}
56 changes: 43 additions & 13 deletions yt_dlp/extractor/picarto.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
import urllib.parse

from .common import InfoExtractor
from ..utils import (
ExtractorError,
js_to_json,
str_or_none,
traverse_obj,
)


Expand Down Expand Up @@ -84,7 +87,7 @@ def _real_extract(self, url):


class PicartoVodIE(InfoExtractor):
_VALID_URL = r'https?://(?:www.)?picarto\.tv/videopopout/(?P<id>[^/?#&]+)'
_VALID_URL = r'https?://(?:www\.)?picarto\.tv/(?:videopopout|\w+/videos)/(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'https://picarto.tv/videopopout/ArtofZod_2017.12.12.00.13.23.flv',
'md5': '3ab45ba4352c52ee841a28fb73f2d9ca',
Expand All @@ -94,6 +97,18 @@ class PicartoVodIE(InfoExtractor):
'title': 'ArtofZod_2017.12.12.00.13.23.flv',
'thumbnail': r're:^https?://.*\.jpg'
},
'skip': 'The VOD does not exist',
}, {
'url': 'https://picarto.tv/ArtofZod/videos/772650',
'md5': '00067a0889f1f6869cc512e3e79c521b',
'info_dict': {
'id': '772650',
'ext': 'mp4',
'title': 'Art of Zod - Drawing and Painting',
'thumbnail': r're:^https?://.*\.jpg',
'channel': 'ArtofZod',
'age_limit': 18,
}
}, {
'url': 'https://picarto.tv/videopopout/Plague',
'only_matching': True,
Expand All @@ -102,21 +117,36 @@ class PicartoVodIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)

webpage = self._download_webpage(url, video_id)

vod_info = self._parse_json(
self._search_regex(
r'(?s)#vod-player["\']\s*,\s*(\{.+?\})\s*\)', webpage,
'vod player'),
video_id, transform_source=js_to_json)
data = self._download_json(
'https://ptvintern.picarto.tv/ptvapi', video_id, query={
'query': f'''{{
video(id: "{video_id}") {{
id
title
adult
file_name
video_recording_image_url
channel {{
name
}}
}}
}}'''
})['data']['video']

file_name = data['file_name']
netloc = urllib.parse.urlparse(data['video_recording_image_url']).netloc

formats = self._extract_m3u8_formats(
vod_info['vod'], video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls')
f'https://{netloc}/stream/hls/{file_name}/index.m3u8', video_id, 'mp4', m3u8_id='hls')

return {
'id': video_id,
'title': video_id,
'thumbnail': vod_info.get('vodThumb'),
**traverse_obj(data, {
'id': ('id', {str_or_none}),
'title': ('title', {str}),
'thumbnail': 'video_recording_image_url',
'channel': ('channel', 'name', {str}),
'age_limit': ('adult', {lambda x: 18 if x else 0}),
}),
'formats': formats,
}
62 changes: 62 additions & 0 deletions yt_dlp/extractor/s4c.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
from .common import InfoExtractor
from ..utils import traverse_obj


class S4CIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?s4c\.cymru/clic/programme/(?P<id>\d+)'
_TESTS = [{
'url': 'https://www.s4c.cymru/clic/programme/861362209',
'info_dict': {
'id': '861362209',
'ext': 'mp4',
'title': 'Y Swn',
'description': 'md5:f7681a30e4955b250b3224aa9fe70cf0',
'duration': 5340
},
}, {
'url': 'https://www.s4c.cymru/clic/programme/856636948',
'info_dict': {
'id': '856636948',
'ext': 'mp4',
'title': 'Am Dro',
'duration': 2880,
'description': 'md5:100d8686fc9a632a0cb2db52a3433ffe',
},
}]

def _real_extract(self, url):
video_id = self._match_id(url)
details = self._download_json(
f'https://www.s4c.cymru/df/full_prog_details?lang=e&programme_id={video_id}',
video_id, fatal=False)

filename = self._download_json(
'https://player-api.s4c-cdn.co.uk/player-configuration/prod', video_id, query={
'programme_id': video_id,
'signed': '0',
'lang': 'en',
'mode': 'od',
'appId': 'clic',
'streamName': '',
}, note='Downloading player config JSON')['filename']
m3u8_url = self._download_json(
'https://player-api.s4c-cdn.co.uk/streaming-urls/prod', video_id, query={
'mode': 'od',
'application': 'clic',
'region': 'WW',
'extra': 'false',
'thirdParty': 'false',
'filename': filename,
}, note='Downloading streaming urls JSON')['hls']
formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, 'mp4', m3u8_id='hls')

return {
'id': video_id,
'formats': formats,
'subtitles': subtitles,
**traverse_obj(details, ('full_prog_details', 0, {
'title': (('programme_title', 'series_title'), {str}),
'description': ('full_billing', {str.strip}),
'duration': ('duration', {lambda x: int(x) * 60}),
}), get_all=False),
}

0 comments on commit 956e5d6

Please sign in to comment.