Skip to content

Commit

Permalink
Merge branch 'yt-dlp:master' into master
Browse files Browse the repository at this point in the history
  • Loading branch information
observeroftime01 authored Oct 20, 2024
2 parents 025d64f + 679c682 commit 9ee2d41
Show file tree
Hide file tree
Showing 6 changed files with 50 additions and 16 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,7 @@ jobs:
permissions:
contents: read
actions: write # For cleaning up cache
runs-on: macos-12
runs-on: macos-13

steps:
- uses: actions/checkout@v4
Expand Down Expand Up @@ -346,7 +346,7 @@ jobs:
macos_legacy:
needs: process
if: inputs.macos_legacy
runs-on: macos-12
runs-on: macos-13

steps:
- uses: actions/checkout@v4
Expand Down
7 changes: 6 additions & 1 deletion yt_dlp/extractor/adobepass.py
Original file line number Diff line number Diff line change
Expand Up @@ -1355,6 +1355,7 @@
class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should end with BaseIE/InfoExtractor
_SERVICE_PROVIDER_TEMPLATE = 'https://sp.auth.adobe.com/adobe-services/%s'
_USER_AGENT = 'Mozilla/5.0 (X11; Linux i686; rv:47.0) Gecko/20100101 Firefox/47.0'
_MODERN_USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; rv:131.0) Gecko/20100101 Firefox/131.0'
_MVPD_CACHE = 'ap-mvpd'

_DOWNLOADING_LOGIN_PAGE = 'Downloading Provider Login Page'
Expand Down Expand Up @@ -1454,7 +1455,11 @@ def extract_redirect_url(html, url=None, fatal=False):
'no_iframe': 'false',
'domain_name': 'adobe.com',
'redirect_url': url,
})
}, headers={
# yt-dlp's default user-agent is usually too old for Comcast_SSO
# See: https://github.com/yt-dlp/yt-dlp/issues/10848
'User-Agent': self._MODERN_USER_AGENT,
} if mso_id == 'Comcast_SSO' else None)
elif not self._cookies_passed:
raise_mvpd_required()

Expand Down
31 changes: 29 additions & 2 deletions yt_dlp/extractor/substack.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,13 @@
import urllib.parse

from .common import InfoExtractor
from ..utils import js_to_json, str_or_none, traverse_obj
from ..networking import HEADRequest
from ..utils import (
determine_ext,
js_to_json,
str_or_none,
)
from ..utils.traversal import traverse_obj


class SubstackIE(InfoExtractor):
Expand Down Expand Up @@ -43,6 +49,19 @@ class SubstackIE(InfoExtractor):
'uploader': "Andrew Zimmern's Spilled Milk ",
'uploader_id': '577659',
},
}, {
# Podcast that needs its file extension resolved to mp3
'url': 'https://persuasion1.substack.com/p/summers',
'md5': '1456a755d46084744facdfac9edf900f',
'info_dict': {
'id': '141970405',
'ext': 'mp3',
'title': 'Larry Summers on What Went Wrong on Campus',
'description': 'Yascha Mounk and Larry Summers also discuss the promise and perils of artificial intelligence.',
'thumbnail': r're:https://substackcdn\.com/image/.+\.jpeg',
'uploader': 'Persuasion',
'uploader_id': '61579',
},
}]

@classmethod
Expand Down Expand Up @@ -89,7 +108,15 @@ def _real_extract(self, url):
post_type = webpage_info['post']['type']
formats, subtitles = [], {}
if post_type == 'podcast':
formats, subtitles = [{'url': webpage_info['post']['podcast_url']}], {}
fmt = {'url': webpage_info['post']['podcast_url']}
if not determine_ext(fmt['url'], default_ext=None):
# The redirected format URL expires but the original URL doesn't,
# so we only want to extract the extension from this request
fmt['ext'] = determine_ext(self._request_webpage(
HEADRequest(fmt['url']), display_id,
'Resolving podcast file extension',
'Podcast URL is invalid').url)
formats.append(fmt)
elif post_type == 'video':
formats, subtitles = self._extract_video_formats(webpage_info['post']['videoUpload']['id'], canonical_url)
else:
Expand Down
5 changes: 1 addition & 4 deletions yt_dlp/extractor/twitter.py
Original file line number Diff line number Diff line change
Expand Up @@ -934,14 +934,13 @@ class TwitterIE(TwitterBaseIE):
'uploader_id': 'MoniqueCamarra',
'live_status': 'was_live',
'release_timestamp': 1658417414,
'description': 'md5:acce559345fd49f129c20dbcda3f1201',
'description': r're:Twitter Space participated by Sergej Sumlenny.+',
'timestamp': 1658407771,
'release_date': '20220721',
'upload_date': '20220721',
},
'add_ie': ['TwitterSpaces'],
'params': {'skip_download': 'm3u8'},
'skip': 'Requires authentication',
}, {
# URL specifies video number but --yes-playlist
'url': 'https://twitter.com/CTVJLaidlaw/status/1600649710662213632/video/1',
Expand Down Expand Up @@ -1856,8 +1855,6 @@ def _build_graphql_query(self, space_id):

def _real_extract(self, url):
space_id = self._match_id(url)
if not self.is_logged_in:
self.raise_login_required('Twitter Spaces require authentication')
space_data = self._call_graphql_api('HPEisOmj1epUNLCWTYhUWw/AudioSpaceById', space_id)['audioSpace']
if not space_data:
raise ExtractorError('Twitter Space not found', expected=True)
Expand Down
3 changes: 2 additions & 1 deletion yt_dlp/extractor/youtube.py
Original file line number Diff line number Diff line change
Expand Up @@ -4701,11 +4701,12 @@ def process_language(container, base_url, lang_code, sub_name, query):
headers=self.generate_api_headers(ytcfg=master_ytcfg),
note='Downloading initial data API JSON')

COMMENTS_SECTION_IDS = ('comment-item-section', 'engagement-panel-comments-section')
info['comment_count'] = traverse_obj(initial_data, (
'contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents', ..., 'itemSectionRenderer',
'contents', ..., 'commentsEntryPointHeaderRenderer', 'commentCount',
), (
'engagementPanels', lambda _, v: v['engagementPanelSectionListRenderer']['panelIdentifier'] == 'comment-item-section',
'engagementPanels', lambda _, v: v['engagementPanelSectionListRenderer']['panelIdentifier'] in COMMENTS_SECTION_IDS,
'engagementPanelSectionListRenderer', 'header', 'engagementPanelTitleHeaderRenderer', 'contextualInfo',
), expected_type=self._get_count, get_all=False)

Expand Down
16 changes: 10 additions & 6 deletions yt_dlp/utils/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -824,14 +824,18 @@ class Popen(subprocess.Popen):
_startupinfo = None

@staticmethod
def _fix_pyinstaller_ld_path(env):
"""Restore LD_LIBRARY_PATH when using PyInstaller
Ref: https://github.com/pyinstaller/pyinstaller/blob/develop/doc/runtime-information.rst#ld_library_path--libpath-considerations
https://github.com/yt-dlp/yt-dlp/issues/4573
"""
def _fix_pyinstaller_issues(env):
if not hasattr(sys, '_MEIPASS'):
return

# Force spawning independent subprocesses for exes bundled with PyInstaller>=6.10
# Ref: https://pyinstaller.org/en/v6.10.0/CHANGES.html#incompatible-changes
# https://github.com/yt-dlp/yt-dlp/issues/11259
env['PYINSTALLER_RESET_ENVIRONMENT'] = '1'

# Restore LD_LIBRARY_PATH when using PyInstaller
# Ref: https://pyinstaller.org/en/v6.10.0/runtime-information.html#ld-library-path-libpath-considerations
# https://github.com/yt-dlp/yt-dlp/issues/4573
def _fix(key):
orig = env.get(f'{key}_ORIG')
if orig is None:
Expand All @@ -845,7 +849,7 @@ def _fix(key):
def __init__(self, args, *remaining, env=None, text=False, shell=False, **kwargs):
if env is None:
env = os.environ.copy()
self._fix_pyinstaller_ld_path(env)
self._fix_pyinstaller_issues(env)

self.__text_mode = kwargs.get('encoding') or kwargs.get('errors') or text or kwargs.get('universal_newlines')
if text is True:
Expand Down

0 comments on commit 9ee2d41

Please sign in to comment.