From 0a31ccef1ce91f3010efc559d3fc1efc02171928 Mon Sep 17 00:00:00 2001 From: Scrub <72096833+ScrubN@users.noreply.github.com> Date: Wed, 19 Jun 2024 20:18:17 -0400 Subject: [PATCH] Move M3U8 parsing logic into separate file (#1104) --- TwitchDownloaderCore/Tools/M3U8.cs | 609 ++---------------------- TwitchDownloaderCore/Tools/M3U8Parse.cs | 585 +++++++++++++++++++++++ 2 files changed, 612 insertions(+), 582 deletions(-) create mode 100644 TwitchDownloaderCore/Tools/M3U8Parse.cs diff --git a/TwitchDownloaderCore/Tools/M3U8.cs b/TwitchDownloaderCore/Tools/M3U8.cs index 15a35831..47a444cd 100644 --- a/TwitchDownloaderCore/Tools/M3U8.cs +++ b/TwitchDownloaderCore/Tools/M3U8.cs @@ -1,9 +1,7 @@ using System; using System.Collections.Generic; using System.Globalization; -using System.IO; using System.Text; -using System.Text.RegularExpressions; using TwitchDownloaderCore.Extensions; namespace TwitchDownloaderCore.Tools @@ -11,7 +9,7 @@ namespace TwitchDownloaderCore.Tools // https://en.wikipedia.org/wiki/M3U // https://datatracker.ietf.org/doc/html/rfc8216 // ReSharper disable StringLiteralTypo - public sealed record M3U8(M3U8.Metadata FileMetadata, M3U8.Stream[] Streams) + public partial record M3U8(M3U8.Metadata FileMetadata, M3U8.Stream[] Streams) { public override string ToString() { @@ -34,167 +32,7 @@ public override string ToString() return sb.ToString(); } - public static M3U8 Parse(System.IO.Stream stream, Encoding streamEncoding, string basePath = "") - { - var sr = new StreamReader(stream, streamEncoding); - if (!ParsingHelpers.TryParseM3UHeader(sr.ReadLine(), out _)) - { - throw new FormatException("Invalid playlist, M3U header is missing."); - } - - var streams = new List(); - - Stream.ExtMediaInfo currentExtMediaInfo = null; - Stream.ExtStreamInfo currentExtStreamInfo = null; - - Metadata.Builder metadataBuilder = new(); - DateTimeOffset currentExtProgramDateTime = default; - Stream.ExtByteRange currentByteRange = default; - Stream.ExtPartInfo currentExtPartInfo = null; - - while (sr.ReadLine() is { } line) - { - if (string.IsNullOrWhiteSpace(line)) - { - ClearStreamMetadata(out currentExtMediaInfo, out currentExtStreamInfo, out currentExtProgramDateTime, out currentByteRange, out currentExtPartInfo); - continue; - } - - if (line[0] != '#') - { - var path = Path.Combine(basePath, line); - streams.Add(new Stream(currentExtMediaInfo, currentExtStreamInfo, currentExtPartInfo, currentExtProgramDateTime, currentByteRange, path)); - ClearStreamMetadata(out currentExtMediaInfo, out currentExtStreamInfo, out currentExtProgramDateTime, out currentByteRange, out currentExtPartInfo); - - continue; - } - - if (!ParseM3U8Key(line, metadataBuilder, ref currentExtMediaInfo, ref currentExtStreamInfo, ref currentExtProgramDateTime, ref currentByteRange, ref currentExtPartInfo)) - { - break; - } - } - - return new M3U8(metadataBuilder.ToMetadata(), streams.ToArray()); - } - - public static M3U8 Parse(ReadOnlySpan text, string basePath = "") - { - if (!ParsingHelpers.TryParseM3UHeader(text, out text)) - { - throw new FormatException("Invalid playlist, M3U header is missing."); - } - - var streams = new List(); - - Stream.ExtMediaInfo currentExtMediaInfo = null; - Stream.ExtStreamInfo currentExtStreamInfo = null; - - Metadata.Builder metadataBuilder = new(); - DateTimeOffset currentExtProgramDateTime = default; - Stream.ExtByteRange currentByteRange = default; - Stream.ExtPartInfo currentExtPartInfo = null; - - var textStart = -1; - var textEnd = text.Length; - var lineEnd = -1; - var iterations = 0; - var maxIterations = text.Count('\n') + 1; - do - { - textStart++; - iterations++; - if (iterations > maxIterations) - throw new Exception("Infinite loop encountered while decoding M3U8 playlist."); - - if (textStart >= textEnd) - break; - - var workingSlice = text[textStart..]; - lineEnd = workingSlice.IndexOf('\n'); - if (lineEnd != -1) - workingSlice = workingSlice[..lineEnd].TrimEnd('\r'); - - if (workingSlice.IsWhiteSpace()) - { - ClearStreamMetadata(out currentExtMediaInfo, out currentExtStreamInfo, out currentExtProgramDateTime, out currentByteRange, out currentExtPartInfo); - continue; - } - - if (workingSlice[0] != '#') - { - var path = Path.Combine(basePath, workingSlice.ToString()); - streams.Add(new Stream(currentExtMediaInfo, currentExtStreamInfo, currentExtPartInfo, currentExtProgramDateTime, currentByteRange, path)); - ClearStreamMetadata(out currentExtMediaInfo, out currentExtStreamInfo, out currentExtProgramDateTime, out currentByteRange, out currentExtPartInfo); - - if (lineEnd == -1) - break; - - continue; - } - - if (!ParseM3U8Key(workingSlice, metadataBuilder, ref currentExtMediaInfo, ref currentExtStreamInfo, ref currentExtProgramDateTime, ref currentByteRange, ref currentExtPartInfo)) - { - break; - } - - if (lineEnd == -1) - { - break; - } - } while ((textStart += lineEnd) < textEnd); - - return new M3U8(metadataBuilder.ToMetadata(), streams.ToArray()); - } - - private static void ClearStreamMetadata(out Stream.ExtMediaInfo currentExtMediaInfo, out Stream.ExtStreamInfo currentExtStreamInfo, out DateTimeOffset currentExtProgramDateTime, - out Stream.ExtByteRange currentByteRange, out Stream.ExtPartInfo currentExtPartInfo) - { - currentExtMediaInfo = null; - currentExtStreamInfo = null; - currentExtProgramDateTime = default; - currentByteRange = default; - currentExtPartInfo = null; - } - - private static bool ParseM3U8Key(ReadOnlySpan text, Metadata.Builder metadataBuilder, ref Stream.ExtMediaInfo extMediaInfo, ref Stream.ExtStreamInfo extStreamInfo, - ref DateTimeOffset extProgramDateTime, ref Stream.ExtByteRange byteRange, ref Stream.ExtPartInfo extPartInfo) - { - const string PROGRAM_DATE_TIME_KEY = "#EXT-X-PROGRAM-DATE-TIME:"; - const string END_LIST_KEY = "#EXT-X-ENDLIST"; - if (text.StartsWith(Stream.ExtMediaInfo.MEDIA_INFO_KEY)) - { - extMediaInfo = Stream.ExtMediaInfo.Parse(text); - } - else if (text.StartsWith(Stream.ExtStreamInfo.STREAM_INFO_KEY)) - { - extStreamInfo = Stream.ExtStreamInfo.Parse(text); - } - else if (text.StartsWith(PROGRAM_DATE_TIME_KEY)) - { - extProgramDateTime = ParsingHelpers.ParseDateTimeOffset(text, PROGRAM_DATE_TIME_KEY, false); - } - else if (text.StartsWith(Stream.ExtByteRange.BYTE_RANGE_KEY)) - { - byteRange = Stream.ExtByteRange.Parse(text); - } - else if (text.StartsWith(Stream.ExtPartInfo.PART_INFO_KEY)) - { - extPartInfo = Stream.ExtPartInfo.Parse(text); - } - else if (text.StartsWith(END_LIST_KEY)) - { - return false; - } - else - { - metadataBuilder.ParseAndAppend(text); - } - - return true; - } - - public sealed record Metadata + public partial record Metadata { public enum PlaylistType { @@ -216,14 +54,14 @@ public enum PlaylistType private const string TWITCH_INFO_KEY = "#EXT-X-TWITCH-INFO:"; // Generic M3U headers - public uint Version { get; private set; } - public uint StreamTargetDuration { get; private set; } - public PlaylistType Type { get; private set; } = PlaylistType.Unknown; - public uint MediaSequence { get; private set; } + public uint Version { get; internal set; } + public uint StreamTargetDuration { get; internal set; } + public PlaylistType Type { get; internal set; } = PlaylistType.Unknown; + public uint MediaSequence { get; internal set; } // Twitch specific - public uint TwitchLiveSequence { get; private set; } - public decimal TwitchElapsedSeconds { get; private set; } + public uint TwitchLiveSequence { get; internal set; } + public decimal TwitchElapsedSeconds { get; internal set; } public decimal TwitchTotalSeconds { get; internal set; } // Other headers that we don't have dedicated properties for. Useful for debugging. @@ -263,95 +101,9 @@ public override string ToString() return sb.TrimEnd(itemSeparator).ToString(); } - - public sealed class Builder - { - private Metadata _metadata; - - public Builder ParseAndAppend(ReadOnlySpan text) - { - text = text.Trim(); - - if (!text.IsEmpty) - { - ParseAndAppendCore(text); - } - - return this; - } - - private void ParseAndAppendCore(ReadOnlySpan text) - { - if (text.StartsWith(TARGET_VERSION_KEY)) - { - _metadata ??= new Metadata(); - _metadata.Version = ParsingHelpers.ParseUIntValue(text, TARGET_VERSION_KEY); - } - else if (text.StartsWith(TARGET_DURATION_KEY)) - { - _metadata ??= new Metadata(); - _metadata.StreamTargetDuration = ParsingHelpers.ParseUIntValue(text, TARGET_DURATION_KEY); - } - else if (text.StartsWith(PLAYLIST_TYPE_KEY)) - { - _metadata ??= new Metadata(); - var temp = text[PLAYLIST_TYPE_KEY.Length..]; - if (temp.StartsWith(PLAYLIST_TYPE_VOD)) - _metadata.Type = PlaylistType.Vod; - else if (temp.StartsWith(PLAYLIST_TYPE_EVENT)) - _metadata.Type = PlaylistType.Event; - else - throw new FormatException($"Unable to parse PlaylistType from: {text}"); - } - else if (text.StartsWith(MEDIA_SEQUENCE_KEY)) - { - _metadata ??= new Metadata(); - _metadata.MediaSequence = ParsingHelpers.ParseUIntValue(text, MEDIA_SEQUENCE_KEY); - } - else if (text.StartsWith(TWITCH_LIVE_SEQUENCE_KEY)) - { - _metadata ??= new Metadata(); - _metadata.TwitchLiveSequence = ParsingHelpers.ParseUIntValue(text, TWITCH_LIVE_SEQUENCE_KEY); - } - else if (text.StartsWith(TWITCH_ELAPSED_SECS_KEY)) - { - _metadata ??= new Metadata(); - _metadata.TwitchElapsedSeconds = ParsingHelpers.ParseDecimalValue(text, TWITCH_ELAPSED_SECS_KEY); - } - else if (text.StartsWith(TWITCH_TOTAL_SECS_KEY)) - { - _metadata ??= new Metadata(); - _metadata.TwitchTotalSeconds = ParsingHelpers.ParseDecimalValue(text, TWITCH_TOTAL_SECS_KEY); - } - else if (text.StartsWith(TWITCH_INFO_KEY)) - { - // Do nothing. This header includes response related info that we don't need. - } - else if (text[0] == '#') - { - _metadata ??= new Metadata(); - var colonIndex = text.IndexOf(':'); - if (colonIndex != -1) - { - var kvp = new KeyValuePair(text[..(colonIndex + 1)].ToString(), text[(colonIndex + 1)..].ToString()); - _metadata._unparsedValues.Add(kvp); - } - else - { - var kvp = new KeyValuePair("", text.ToString()); - _metadata._unparsedValues.Add(kvp); - } - } - } - - public Metadata ToMetadata() - { - return _metadata; - } - } } - public sealed record Stream(Stream.ExtMediaInfo MediaInfo, Stream.ExtStreamInfo StreamInfo, Stream.ExtPartInfo PartInfo, DateTimeOffset ProgramDateTime, Stream.ExtByteRange ByteRange, string Path) + public partial record Stream(Stream.ExtMediaInfo MediaInfo, Stream.ExtStreamInfo StreamInfo, Stream.ExtPartInfo PartInfo, DateTimeOffset ProgramDateTime, Stream.ExtByteRange ByteRange, string Path) { public Stream(ExtMediaInfo mediaInfo, ExtStreamInfo streamInfo, string path) : this(mediaInfo, streamInfo, null, default, default, path) { } @@ -390,34 +142,16 @@ public override string ToString() return sb.ToString(); } - public readonly record struct ExtByteRange(uint Start, uint Length) + public readonly partial record struct ExtByteRange(uint Start, uint Length) { internal const string BYTE_RANGE_KEY = "#EXT-X-BYTERANGE:"; public override string ToString() => $"{BYTE_RANGE_KEY}{Start}@{Length}"; - public static ExtByteRange Parse(ReadOnlySpan text) - { - if (text.StartsWith(BYTE_RANGE_KEY)) - text = text[17..]; - - var separatorIndex = text.IndexOf('@'); - if (separatorIndex == -1) - throw new FormatException($"Unable to parse ByteRange from {text}."); - - if (!uint.TryParse(text[..separatorIndex], NumberStyles.Integer, CultureInfo.InvariantCulture, out var start)) - throw new FormatException($"Unable to parse ByteRange from {text}."); - - if (!uint.TryParse(text[(separatorIndex + 1)..], NumberStyles.Integer, CultureInfo.InvariantCulture, out var end)) - throw new FormatException($"Unable to parse ByteRange from {text}."); - - return new ExtByteRange(start, end); - } - public static implicit operator ExtByteRange((uint start, uint length) tuple) => new(tuple.start, tuple.length); } - public sealed class ExtMediaInfo + public partial record ExtMediaInfo { public enum MediaType { @@ -442,11 +176,11 @@ public ExtMediaInfo(MediaType type, string groupId, string name, bool autoSelect Default = @default; } - public MediaType Type { get; private set; } = MediaType.Unknown; - public string GroupId { get; private set; } - public string Name { get; private set; } - public bool AutoSelect { get; private set; } - public bool Default { get; private set; } + public MediaType Type { get; internal set; } = MediaType.Unknown; + public string GroupId { get; internal set; } + public string Name { get; internal set; } + public bool AutoSelect { get; internal set; } + public bool Default { get; internal set; } public override string ToString() { @@ -477,85 +211,14 @@ static string BooleanToWord(bool b) return b ? "YES" : "NO"; } } - - public static ExtMediaInfo Parse(ReadOnlySpan text) - { - var mediaInfo = new ExtMediaInfo(); - - if (text.StartsWith(MEDIA_INFO_KEY)) - text = text[13..]; - - const string KEY_TYPE = "TYPE="; - const string KEY_GROUP_ID = "GROUP-ID=\""; - const string KEY_NAME = "NAME=\""; - const string KEY_AUTOSELECT = "AUTOSELECT="; - const string KEY_DEFAULT = "DEFAULT="; - do - { - text = text.TrimStart(); - - if (text.StartsWith(KEY_TYPE)) - { - var temp = text[KEY_TYPE.Length..]; - if (temp.StartsWith(MEDIA_TYPE_VIDEO)) - mediaInfo.Type = MediaType.Video; - else if (temp.StartsWith(MEDIA_TYPE_AUDIO)) - mediaInfo.Type = MediaType.Audio; - else - throw new FormatException($"Unable to parse MediaType from: {text}"); - } - else if (text.StartsWith(KEY_GROUP_ID)) - { - mediaInfo.GroupId = ParsingHelpers.ParseStringValue(text, KEY_GROUP_ID); - } - else if (text.StartsWith(KEY_NAME)) - { - mediaInfo.Name = ParsingHelpers.ParseStringValue(text, KEY_NAME); - } - else if (text.StartsWith(KEY_AUTOSELECT)) - { - mediaInfo.AutoSelect = ParsingHelpers.ParseBooleanValue(text, KEY_AUTOSELECT); - } - else if (text.StartsWith(KEY_DEFAULT)) - { - mediaInfo.Default = ParsingHelpers.ParseBooleanValue(text, KEY_DEFAULT); - } - - var nextIndex = text.UnEscapedIndexOf(','); - if (nextIndex == -1) - break; - - text = text[(nextIndex + 1)..]; - } while (true); - - return mediaInfo; - } } - public sealed record ExtStreamInfo + public partial record ExtStreamInfo { - public readonly record struct StreamResolution(uint Width, uint Height) + public readonly partial record struct StreamResolution(uint Width, uint Height) { public override string ToString() => $"{Width}x{Height}"; - public static StreamResolution Parse(ReadOnlySpan text) - { - if (text.StartsWith("RESOLUTION=")) - text = text[11..]; - - var separatorIndex = text.IndexOfAny("x"); - if (separatorIndex == -1 || separatorIndex == text.Length) - throw new FormatException($"Unable to parse Resolution from {text}."); - - if (!uint.TryParse(text[..separatorIndex], NumberStyles.Integer, CultureInfo.InvariantCulture, out var width)) - throw new FormatException($"Unable to parse Resolution from {text}."); - - if (!uint.TryParse(text[(separatorIndex + 1)..], NumberStyles.Integer, CultureInfo.InvariantCulture, out var height)) - throw new FormatException($"Unable to parse Resolution from {text}."); - - return new StreamResolution(width, height); - } - public static implicit operator StreamResolution((uint width, uint height) tuple) => new(tuple.width, tuple.height); } @@ -573,12 +236,12 @@ public ExtStreamInfo(int programId, int bandwidth, string codecs, StreamResoluti Framerate = framerate; } - public int ProgramId { get; private set; } - public int Bandwidth { get; private set; } - public string Codecs { get; private set; } - public StreamResolution Resolution { get; private set; } - public string Video { get; private set; } - public decimal Framerate { get; private set; } + public int ProgramId { get; internal set; } + public int Bandwidth { get; internal set; } + public string Codecs { get; internal set; } + public StreamResolution Resolution { get; internal set; } + public string Video { get; internal set; } + public decimal Framerate { get; internal set; } public override string ToString() { @@ -594,68 +257,9 @@ public override string ToString() return sb.ToString(); } - - public static ExtStreamInfo Parse(ReadOnlySpan text) - { - var streamInfo = new ExtStreamInfo(); - - if (text.StartsWith(STREAM_INFO_KEY)) - text = text[18..]; - - const string KEY_PROGRAM_ID = "PROGRAM-ID="; - const string KEY_BANDWIDTH = "BANDWIDTH="; - const string KEY_CODECS = "CODECS=\""; - const string KEY_RESOLUTION = "RESOLUTION="; - const string KEY_VIDEO = "VIDEO=\""; - const string KEY_FRAMERATE = "FRAME-RATE="; - do - { - text = text.TrimStart(); - - if (text.StartsWith(KEY_PROGRAM_ID)) - { - streamInfo.ProgramId = ParsingHelpers.ParseIntValue(text, KEY_PROGRAM_ID, false); - } - else if (text.StartsWith(KEY_BANDWIDTH)) - { - streamInfo.Bandwidth = ParsingHelpers.ParseIntValue(text, KEY_BANDWIDTH, false); - } - else if (text.StartsWith(KEY_CODECS)) - { - streamInfo.Codecs = ParsingHelpers.ParseStringValue(text, KEY_CODECS); - } - else if (text.StartsWith(KEY_RESOLUTION)) - { - streamInfo.Resolution = ParsingHelpers.ParseResolution(text, KEY_RESOLUTION); - } - else if (text.StartsWith(KEY_VIDEO)) - { - streamInfo.Video = ParsingHelpers.ParseStringValue(text, KEY_VIDEO); - } - else if (text.StartsWith(KEY_FRAMERATE)) - { - streamInfo.Framerate = ParsingHelpers.ParseDecimalValue(text, KEY_FRAMERATE, false); - } - - var nextIndex = text.UnEscapedIndexOf(','); - if (nextIndex == -1) - break; - - text = text[(nextIndex + 1)..]; - } while (true); - - // Sometimes Twitch's M3U8 response lacks a Framerate value, among other things. We can just guess the framerate using the Video value. - if (streamInfo.Framerate == 0 && Regex.IsMatch(streamInfo.Video, @"p\d+$", RegexOptions.RightToLeft)) - { - var index = streamInfo.Video.LastIndexOf('p'); - streamInfo.Framerate = int.Parse(streamInfo.Video.AsSpan(index + 1)); - } - - return streamInfo; - } } - public sealed record ExtPartInfo + public partial record ExtPartInfo { internal const string PART_INFO_KEY = "#EXTINF:"; @@ -667,8 +271,8 @@ public ExtPartInfo(decimal duration, bool live) Live = live; } - public decimal Duration { get; private set; } - public bool Live { get; private set; } + public decimal Duration { get; internal set; } + public bool Live { get; internal set; } public override string ToString() { @@ -686,165 +290,6 @@ public override string ToString() return sb.ToString(); } - - public static ExtPartInfo Parse(ReadOnlySpan text) - { - var partInfo = new ExtPartInfo(); - - if (text.StartsWith(PART_INFO_KEY)) - text = text[8..]; - - do - { - text = text.TrimStart(); - - if (!text.IsEmpty && char.IsDigit(text[0])) - { - partInfo.Duration = ParsingHelpers.ParseDecimalValue(text, ""); - } - else if (text.StartsWith("live")) - { - partInfo.Live = true; - } - - var nextIndex = text.UnEscapedIndexOf(','); - if (nextIndex == -1) - break; - - text = text[(nextIndex + 1)..]; - } while (true); - - return partInfo; - } - } - } - - private static class ParsingHelpers - { - public static bool TryParseM3UHeader(ReadOnlySpan text, out ReadOnlySpan textWithoutHeader) - { - const string M3U_HEADER = "#EXTM3U"; - if (!text.StartsWith(M3U_HEADER)) - { - textWithoutHeader = default; - return false; - } - - textWithoutHeader = text[7..].TrimStart(" \r\n"); - return true; - } - - public static string ParseStringValue(ReadOnlySpan text, ReadOnlySpan keyName) - { - var temp = text[keyName.Length..]; - - if (temp.Contains("\\\"", StringComparison.Ordinal)) - { - throw new NotSupportedException("Escaped quotes are not supported. Please report this as a bug: https://github.com/lay295/TwitchDownloader/issues/new/choose"); - } - - var closeQuote = temp.IndexOf('"'); - if (closeQuote == -1) - { - throw new FormatException("Expected close quote was not found."); - } - - return temp[..closeQuote].ToString(); - } - - public static int ParseIntValue(ReadOnlySpan text, ReadOnlySpan keyName, bool strict = true) - { - var temp = text[keyName.Length..]; - temp = temp[..NextKeyStart(temp)]; - - if (int.TryParse(temp, NumberStyles.Integer, CultureInfo.InvariantCulture, out var intValue)) - return intValue; - - if (!strict) - return default; - - throw new FormatException($"Unable to parse integer from: {text}"); - } - - public static uint ParseUIntValue(ReadOnlySpan text, ReadOnlySpan keyName, bool strict = true) - { - var temp = text[keyName.Length..]; - temp = temp[..NextKeyStart(temp)]; - - if (uint.TryParse(temp, NumberStyles.Integer, CultureInfo.InvariantCulture, out var uIntValue)) - return uIntValue; - - if (!strict) - return default; - - throw new FormatException($"Unable to parse integer from: {text}"); - } - - public static decimal ParseDecimalValue(ReadOnlySpan text, ReadOnlySpan keyName, bool strict = true) - { - var temp = text[keyName.Length..]; - temp = temp[..NextKeyStart(temp)]; - - if (decimal.TryParse(temp, NumberStyles.Number, CultureInfo.InvariantCulture, out var decimalValue)) - return decimalValue; - - if (!strict) - return default; - - throw new FormatException($"Unable to parse decimal from: {text}"); - } - - public static bool ParseBooleanValue(ReadOnlySpan text, ReadOnlySpan keyName, bool strict = true) - { - var temp = text[keyName.Length..]; - - if (temp.StartsWith("NO")) - return false; - - if (temp.StartsWith("YES")) - return true; - - temp = temp[..NextKeyStart(temp)]; - - if (bool.TryParse(temp, out var booleanValue)) - return booleanValue; - - if (!strict) - return default; - - throw new FormatException($"Unable to parse boolean from: {text}"); - } - - public static Stream.ExtStreamInfo.StreamResolution ParseResolution(ReadOnlySpan text, ReadOnlySpan keyName) - { - var temp = text[keyName.Length..]; - temp = temp[..NextKeyStart(temp)]; - - return Stream.ExtStreamInfo.StreamResolution.Parse(temp); - } - - public static DateTimeOffset ParseDateTimeOffset(ReadOnlySpan text, ReadOnlySpan keyName, bool strict = true) - { - var temp = text[keyName.Length..]; - temp = temp[..NextKeyStart(temp)]; - - if (DateTimeOffset.TryParse(temp, null, DateTimeStyles.AssumeUniversal, out var dateTimeOffset)) - return dateTimeOffset; - - if (!strict) - return default; - - throw new FormatException($"Unable to parse DateTimeOffset from: {text}"); - } - - private static Index NextKeyStart(ReadOnlySpan text) - { - var nextKey = text.UnEscapedIndexOfAny(",\r\n"); - return nextKey switch - { - -1 => text.Length, // This is probably the last value - _ => nextKey - }; } } diff --git a/TwitchDownloaderCore/Tools/M3U8Parse.cs b/TwitchDownloaderCore/Tools/M3U8Parse.cs new file mode 100644 index 00000000..dfa45e0a --- /dev/null +++ b/TwitchDownloaderCore/Tools/M3U8Parse.cs @@ -0,0 +1,585 @@ +using System; +using System.Collections.Generic; +using System.Globalization; +using System.IO; +using System.Text; +using System.Text.RegularExpressions; +using TwitchDownloaderCore.Extensions; + +namespace TwitchDownloaderCore.Tools +{ + public partial record M3U8 + { + public static M3U8 Parse(System.IO.Stream stream, Encoding streamEncoding, string basePath = "") + { + var sr = new StreamReader(stream, streamEncoding); + if (!ParsingHelpers.TryParseM3UHeader(sr.ReadLine(), out _)) + { + throw new FormatException("Invalid playlist, M3U header is missing."); + } + + var streams = new List(); + + Stream.ExtMediaInfo currentExtMediaInfo = null; + Stream.ExtStreamInfo currentExtStreamInfo = null; + + Metadata.Builder metadataBuilder = new(); + DateTimeOffset currentExtProgramDateTime = default; + Stream.ExtByteRange currentByteRange = default; + Stream.ExtPartInfo currentExtPartInfo = null; + + while (sr.ReadLine() is { } line) + { + if (string.IsNullOrWhiteSpace(line)) + { + ClearStreamMetadata(out currentExtMediaInfo, out currentExtStreamInfo, out currentExtProgramDateTime, out currentByteRange, out currentExtPartInfo); + continue; + } + + if (line[0] != '#') + { + var path = Path.Combine(basePath, line); + streams.Add(new Stream(currentExtMediaInfo, currentExtStreamInfo, currentExtPartInfo, currentExtProgramDateTime, currentByteRange, path)); + ClearStreamMetadata(out currentExtMediaInfo, out currentExtStreamInfo, out currentExtProgramDateTime, out currentByteRange, out currentExtPartInfo); + + continue; + } + + if (!ParseM3U8Key(line, metadataBuilder, ref currentExtMediaInfo, ref currentExtStreamInfo, ref currentExtProgramDateTime, ref currentByteRange, ref currentExtPartInfo)) + { + break; + } + } + + return new M3U8(metadataBuilder.ToMetadata(), streams.ToArray()); + } + + public static M3U8 Parse(ReadOnlySpan text, string basePath = "") + { + if (!ParsingHelpers.TryParseM3UHeader(text, out text)) + { + throw new FormatException("Invalid playlist, M3U header is missing."); + } + + var streams = new List(); + + Stream.ExtMediaInfo currentExtMediaInfo = null; + Stream.ExtStreamInfo currentExtStreamInfo = null; + + Metadata.Builder metadataBuilder = new(); + DateTimeOffset currentExtProgramDateTime = default; + Stream.ExtByteRange currentByteRange = default; + Stream.ExtPartInfo currentExtPartInfo = null; + + var textStart = -1; + var textEnd = text.Length; + var lineEnd = -1; + var iterations = 0; + var maxIterations = text.Count('\n') + 1; + do + { + textStart++; + iterations++; + if (iterations > maxIterations) + throw new Exception("Infinite loop encountered while decoding M3U8 playlist."); + + if (textStart >= textEnd) + break; + + var workingSlice = text[textStart..]; + lineEnd = workingSlice.IndexOf('\n'); + if (lineEnd != -1) + workingSlice = workingSlice[..lineEnd].TrimEnd('\r'); + + if (workingSlice.IsWhiteSpace()) + { + ClearStreamMetadata(out currentExtMediaInfo, out currentExtStreamInfo, out currentExtProgramDateTime, out currentByteRange, out currentExtPartInfo); + continue; + } + + if (workingSlice[0] != '#') + { + var path = Path.Combine(basePath, workingSlice.ToString()); + streams.Add(new Stream(currentExtMediaInfo, currentExtStreamInfo, currentExtPartInfo, currentExtProgramDateTime, currentByteRange, path)); + ClearStreamMetadata(out currentExtMediaInfo, out currentExtStreamInfo, out currentExtProgramDateTime, out currentByteRange, out currentExtPartInfo); + + if (lineEnd == -1) + break; + + continue; + } + + if (!ParseM3U8Key(workingSlice, metadataBuilder, ref currentExtMediaInfo, ref currentExtStreamInfo, ref currentExtProgramDateTime, ref currentByteRange, ref currentExtPartInfo)) + { + break; + } + + if (lineEnd == -1) + { + break; + } + } while ((textStart += lineEnd) < textEnd); + + return new M3U8(metadataBuilder.ToMetadata(), streams.ToArray()); + } + + private static void ClearStreamMetadata(out Stream.ExtMediaInfo currentExtMediaInfo, out Stream.ExtStreamInfo currentExtStreamInfo, out DateTimeOffset currentExtProgramDateTime, + out Stream.ExtByteRange currentByteRange, out Stream.ExtPartInfo currentExtPartInfo) + { + currentExtMediaInfo = null; + currentExtStreamInfo = null; + currentExtProgramDateTime = default; + currentByteRange = default; + currentExtPartInfo = null; + } + + private static bool ParseM3U8Key(ReadOnlySpan text, Metadata.Builder metadataBuilder, ref Stream.ExtMediaInfo extMediaInfo, ref Stream.ExtStreamInfo extStreamInfo, + ref DateTimeOffset extProgramDateTime, ref Stream.ExtByteRange byteRange, ref Stream.ExtPartInfo extPartInfo) + { + const string PROGRAM_DATE_TIME_KEY = "#EXT-X-PROGRAM-DATE-TIME:"; + const string END_LIST_KEY = "#EXT-X-ENDLIST"; + if (text.StartsWith(Stream.ExtMediaInfo.MEDIA_INFO_KEY)) + { + extMediaInfo = Stream.ExtMediaInfo.Parse(text); + } + else if (text.StartsWith(Stream.ExtStreamInfo.STREAM_INFO_KEY)) + { + extStreamInfo = Stream.ExtStreamInfo.Parse(text); + } + else if (text.StartsWith(PROGRAM_DATE_TIME_KEY)) + { + extProgramDateTime = ParsingHelpers.ParseDateTimeOffset(text, PROGRAM_DATE_TIME_KEY, false); + } + else if (text.StartsWith(Stream.ExtByteRange.BYTE_RANGE_KEY)) + { + byteRange = Stream.ExtByteRange.Parse(text); + } + else if (text.StartsWith(Stream.ExtPartInfo.PART_INFO_KEY)) + { + extPartInfo = Stream.ExtPartInfo.Parse(text); + } + else if (text.StartsWith(END_LIST_KEY)) + { + return false; + } + else + { + metadataBuilder.ParseAndAppend(text); + } + + return true; + } + + public partial record Metadata + { + public sealed class Builder + { + private Metadata _metadata; + + public Builder ParseAndAppend(ReadOnlySpan text) + { + text = text.Trim(); + + if (!text.IsEmpty) + { + ParseAndAppendCore(text); + } + + return this; + } + + private void ParseAndAppendCore(ReadOnlySpan text) + { + if (text.StartsWith(TARGET_VERSION_KEY)) + { + _metadata ??= new Metadata(); + _metadata.Version = ParsingHelpers.ParseUIntValue(text, TARGET_VERSION_KEY); + } + else if (text.StartsWith(TARGET_DURATION_KEY)) + { + _metadata ??= new Metadata(); + _metadata.StreamTargetDuration = ParsingHelpers.ParseUIntValue(text, TARGET_DURATION_KEY); + } + else if (text.StartsWith(PLAYLIST_TYPE_KEY)) + { + _metadata ??= new Metadata(); + var temp = text[PLAYLIST_TYPE_KEY.Length..]; + if (temp.StartsWith(PLAYLIST_TYPE_VOD)) + _metadata.Type = PlaylistType.Vod; + else if (temp.StartsWith(PLAYLIST_TYPE_EVENT)) + _metadata.Type = PlaylistType.Event; + else + throw new FormatException($"Unable to parse PlaylistType from: {text}"); + } + else if (text.StartsWith(MEDIA_SEQUENCE_KEY)) + { + _metadata ??= new Metadata(); + _metadata.MediaSequence = ParsingHelpers.ParseUIntValue(text, MEDIA_SEQUENCE_KEY); + } + else if (text.StartsWith(TWITCH_LIVE_SEQUENCE_KEY)) + { + _metadata ??= new Metadata(); + _metadata.TwitchLiveSequence = ParsingHelpers.ParseUIntValue(text, TWITCH_LIVE_SEQUENCE_KEY); + } + else if (text.StartsWith(TWITCH_ELAPSED_SECS_KEY)) + { + _metadata ??= new Metadata(); + _metadata.TwitchElapsedSeconds = ParsingHelpers.ParseDecimalValue(text, TWITCH_ELAPSED_SECS_KEY); + } + else if (text.StartsWith(TWITCH_TOTAL_SECS_KEY)) + { + _metadata ??= new Metadata(); + _metadata.TwitchTotalSeconds = ParsingHelpers.ParseDecimalValue(text, TWITCH_TOTAL_SECS_KEY); + } + else if (text.StartsWith(TWITCH_INFO_KEY)) + { + // Do nothing. This header includes response related info that we don't need. + } + else if (text[0] == '#') + { + _metadata ??= new Metadata(); + var colonIndex = text.IndexOf(':'); + if (colonIndex != -1) + { + var kvp = new KeyValuePair(text[..(colonIndex + 1)].ToString(), text[(colonIndex + 1)..].ToString()); + _metadata._unparsedValues.Add(kvp); + } + else + { + var kvp = new KeyValuePair("", text.ToString()); + _metadata._unparsedValues.Add(kvp); + } + } + } + + public Metadata ToMetadata() + { + return _metadata; + } + } + } + + public partial record Stream + { + public partial record struct ExtByteRange + { + public static ExtByteRange Parse(ReadOnlySpan text) + { + if (text.StartsWith(BYTE_RANGE_KEY)) + text = text[17..]; + + var separatorIndex = text.IndexOf('@'); + if (separatorIndex != -1 + && separatorIndex != text.Length + && uint.TryParse(text[..separatorIndex], NumberStyles.Integer, CultureInfo.InvariantCulture, out var start) + && uint.TryParse(text[(separatorIndex + 1)..], NumberStyles.Integer, CultureInfo.InvariantCulture, out var end)) + { + return new ExtByteRange(start, end); + } + + throw new FormatException($"Unable to parse ByteRange from {text}."); + } + } + + public partial record ExtMediaInfo + { + public static Stream.ExtMediaInfo Parse(ReadOnlySpan text) + { + var mediaInfo = new Stream.ExtMediaInfo(); + + if (text.StartsWith(MEDIA_INFO_KEY)) + text = text[13..]; + + const string KEY_TYPE = "TYPE="; + const string KEY_GROUP_ID = "GROUP-ID=\""; + const string KEY_NAME = "NAME=\""; + const string KEY_AUTOSELECT = "AUTOSELECT="; + const string KEY_DEFAULT = "DEFAULT="; + do + { + text = text.TrimStart(); + + if (text.StartsWith(KEY_TYPE)) + { + var temp = text[KEY_TYPE.Length..]; + if (temp.StartsWith(MEDIA_TYPE_VIDEO)) + mediaInfo.Type = MediaType.Video; + else if (temp.StartsWith(MEDIA_TYPE_AUDIO)) + mediaInfo.Type = MediaType.Audio; + else + throw new FormatException($"Unable to parse MediaType from: {text}"); + } + else if (text.StartsWith(KEY_GROUP_ID)) + { + mediaInfo.GroupId = ParsingHelpers.ParseStringValue(text, KEY_GROUP_ID); + } + else if (text.StartsWith(KEY_NAME)) + { + mediaInfo.Name = ParsingHelpers.ParseStringValue(text, KEY_NAME); + } + else if (text.StartsWith(KEY_AUTOSELECT)) + { + mediaInfo.AutoSelect = ParsingHelpers.ParseBooleanValue(text, KEY_AUTOSELECT); + } + else if (text.StartsWith(KEY_DEFAULT)) + { + mediaInfo.Default = ParsingHelpers.ParseBooleanValue(text, KEY_DEFAULT); + } + + var nextIndex = text.UnEscapedIndexOf(','); + if (nextIndex == -1) + break; + + text = text[(nextIndex + 1)..]; + } while (true); + + return mediaInfo; + } + } + + public partial record ExtStreamInfo + { + public partial record struct StreamResolution + { + public static StreamResolution Parse(ReadOnlySpan text) + { + if (text.StartsWith("RESOLUTION=")) + text = text[11..]; + + var separatorIndex = text.IndexOfAny("x"); + if (separatorIndex != -1 + && separatorIndex != text.Length + && uint.TryParse(text[..separatorIndex], NumberStyles.Integer, CultureInfo.InvariantCulture, out var width) + && uint.TryParse(text[(separatorIndex + 1)..], NumberStyles.Integer, CultureInfo.InvariantCulture, out var height)) + { + return new StreamResolution(width, height); + } + + throw new FormatException($"Unable to parse Resolution from {text}."); + } + } + + public static ExtStreamInfo Parse(ReadOnlySpan text) + { + var streamInfo = new ExtStreamInfo(); + + if (text.StartsWith(STREAM_INFO_KEY)) + text = text[18..]; + + const string KEY_PROGRAM_ID = "PROGRAM-ID="; + const string KEY_BANDWIDTH = "BANDWIDTH="; + const string KEY_CODECS = "CODECS=\""; + const string KEY_RESOLUTION = "RESOLUTION="; + const string KEY_VIDEO = "VIDEO=\""; + const string KEY_FRAMERATE = "FRAME-RATE="; + do + { + text = text.TrimStart(); + + if (text.StartsWith(KEY_PROGRAM_ID)) + { + streamInfo.ProgramId = ParsingHelpers.ParseIntValue(text, KEY_PROGRAM_ID, false); + } + else if (text.StartsWith(KEY_BANDWIDTH)) + { + streamInfo.Bandwidth = ParsingHelpers.ParseIntValue(text, KEY_BANDWIDTH, false); + } + else if (text.StartsWith(KEY_CODECS)) + { + streamInfo.Codecs = ParsingHelpers.ParseStringValue(text, KEY_CODECS); + } + else if (text.StartsWith(KEY_RESOLUTION)) + { + streamInfo.Resolution = ParsingHelpers.ParseResolution(text, KEY_RESOLUTION); + } + else if (text.StartsWith(KEY_VIDEO)) + { + streamInfo.Video = ParsingHelpers.ParseStringValue(text, KEY_VIDEO); + } + else if (text.StartsWith(KEY_FRAMERATE)) + { + streamInfo.Framerate = ParsingHelpers.ParseDecimalValue(text, KEY_FRAMERATE, false); + } + + var nextIndex = text.UnEscapedIndexOf(','); + if (nextIndex == -1) + break; + + text = text[(nextIndex + 1)..]; + } while (true); + + // Sometimes Twitch's M3U8 response lacks a Framerate value, among other things. We can just guess the framerate using the Video value. + if (streamInfo.Framerate == 0 && Regex.IsMatch(streamInfo.Video, @"p\d+$", RegexOptions.RightToLeft)) + { + var index = streamInfo.Video.LastIndexOf('p'); + streamInfo.Framerate = int.Parse(streamInfo.Video.AsSpan(index + 1)); + } + + return streamInfo; + } + } + + public partial record ExtPartInfo + { + public static ExtPartInfo Parse(ReadOnlySpan text) + { + var partInfo = new ExtPartInfo(); + + if (text.StartsWith(PART_INFO_KEY)) + text = text[8..]; + + do + { + text = text.TrimStart(); + + if (!text.IsEmpty && char.IsDigit(text[0])) + { + partInfo.Duration = ParsingHelpers.ParseDecimalValue(text, ""); + } + else if (text.StartsWith("live")) + { + partInfo.Live = true; + } + + var nextIndex = text.UnEscapedIndexOf(','); + if (nextIndex == -1) + break; + + text = text[(nextIndex + 1)..]; + } while (true); + + return partInfo; + } + } + } + + private static class ParsingHelpers + { + public static bool TryParseM3UHeader(ReadOnlySpan text, out ReadOnlySpan textWithoutHeader) + { + const string M3U_HEADER = "#EXTM3U"; + if (!text.StartsWith(M3U_HEADER)) + { + textWithoutHeader = default; + return false; + } + + textWithoutHeader = text[7..].TrimStart(" \r\n"); + return true; + } + + public static string ParseStringValue(ReadOnlySpan text, ReadOnlySpan keyName) + { + var temp = text[keyName.Length..]; + + if (temp.Contains("\\\"", StringComparison.Ordinal)) + { + throw new NotSupportedException("Escaped quotes are not supported. Please report this as a bug: https://github.com/lay295/TwitchDownloader/issues/new/choose"); + } + + var closeQuote = temp.IndexOf('"'); + if (closeQuote == -1) + { + throw new FormatException("Expected close quote was not found."); + } + + return temp[..closeQuote].ToString(); + } + + public static int ParseIntValue(ReadOnlySpan text, ReadOnlySpan keyName, bool strict = true) + { + var temp = text[keyName.Length..]; + temp = temp[..NextKeyStart(temp)]; + + if (int.TryParse(temp, NumberStyles.Integer, CultureInfo.InvariantCulture, out var intValue)) + return intValue; + + if (!strict) + return default; + + throw new FormatException($"Unable to parse integer from: {text}"); + } + + public static uint ParseUIntValue(ReadOnlySpan text, ReadOnlySpan keyName, bool strict = true) + { + var temp = text[keyName.Length..]; + temp = temp[..NextKeyStart(temp)]; + + if (uint.TryParse(temp, NumberStyles.Integer, CultureInfo.InvariantCulture, out var uIntValue)) + return uIntValue; + + if (!strict) + return default; + + throw new FormatException($"Unable to parse integer from: {text}"); + } + + public static decimal ParseDecimalValue(ReadOnlySpan text, ReadOnlySpan keyName, bool strict = true) + { + var temp = text[keyName.Length..]; + temp = temp[..NextKeyStart(temp)]; + + if (decimal.TryParse(temp, NumberStyles.Number, CultureInfo.InvariantCulture, out var decimalValue)) + return decimalValue; + + if (!strict) + return default; + + throw new FormatException($"Unable to parse decimal from: {text}"); + } + + public static bool ParseBooleanValue(ReadOnlySpan text, ReadOnlySpan keyName, bool strict = true) + { + var temp = text[keyName.Length..]; + + if (temp.StartsWith("NO")) + return false; + + if (temp.StartsWith("YES")) + return true; + + temp = temp[..NextKeyStart(temp)]; + + if (bool.TryParse(temp, out var booleanValue)) + return booleanValue; + + if (!strict) + return default; + + throw new FormatException($"Unable to parse boolean from: {text}"); + } + + public static Stream.ExtStreamInfo.StreamResolution ParseResolution(ReadOnlySpan text, ReadOnlySpan keyName) + { + var temp = text[keyName.Length..]; + temp = temp[..NextKeyStart(temp)]; + + return Stream.ExtStreamInfo.StreamResolution.Parse(temp); + } + + public static DateTimeOffset ParseDateTimeOffset(ReadOnlySpan text, ReadOnlySpan keyName, bool strict = true) + { + var temp = text[keyName.Length..]; + temp = temp[..NextKeyStart(temp)]; + + if (DateTimeOffset.TryParse(temp, null, DateTimeStyles.AssumeUniversal, out var dateTimeOffset)) + return dateTimeOffset; + + if (!strict) + return default; + + throw new FormatException($"Unable to parse DateTimeOffset from: {text}"); + } + + private static Index NextKeyStart(ReadOnlySpan text) + { + var nextKey = text.UnEscapedIndexOfAny(",\r\n"); + return nextKey switch + { + -1 => text.Length, // This is probably the last value + _ => nextKey + }; + } + } + } +} \ No newline at end of file