-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathurl-extractor.vala
251 lines (218 loc) · 9.32 KB
/
url-extractor.vala
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
/*
* Totem Arte Plugin allows you to watch streams from arte.tv
* Copyright (C) 2010, 2011, 2012 Simon Wenner <[email protected]>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*
* The Totem Arte Plugin project hereby grants permission for non-GPL compatible
* GStreamer plugins to be used and distributed together with GStreamer, Totem
* and Totem Arte Plugin. This permission is above and beyond the permissions
* granted by the GPL license by which Totem Arte Plugin is covered.
* If you modify this code, you may extend this exception to your version of the
* code, but you are not obligated to do so. If you do not wish to do so,
* delete this exception statement from your version.
*
*/
using GLib;
using Soup;
using Json;
public errordomain ExtractionError
{
DOWNLOAD_FAILED,
EXTRACTION_FAILED,
STREAM_NOT_READY,
ACCESS_RESTRICTED
}
public interface UrlExtractor : GLib.Object
{
public abstract string get_url (VideoQuality q, Language lang, string page_url)
throws ExtractionError;
}
public class IndirectUrlExtractor : GLib.Object
{
protected Soup.Session session;
public IndirectUrlExtractor()
{
session = create_session ();
}
protected string extract_string_from_page (string url, string regexp)
throws ExtractionError
{
/* Download */
var msg = new Soup.Message ("GET", url);
this.session.send_message(msg);
if (msg.response_body.data == null)
throw new ExtractionError.DOWNLOAD_FAILED ("Video URL Extraction Error");
/* Extract */
string res = null;
try {
MatchInfo match;
var regex = new Regex (regexp);
regex.match((string) msg.response_body.flatten ().data, 0, out match);
res = match.fetch(1);
} catch (RegexError e) {
GLib.warning ("%s", e.message);
throw new ExtractionError.EXTRACTION_FAILED (e.message);
}
return res;
}
}
public class RTMPStreamUrlExtractor : IndirectUrlExtractor, UrlExtractor
{
// These properties act as a rudimentary cache
private string last_page_url = null;
private string json_uri = null;
private Json.Object streams_object = null;
private string player_uri = null;
private Json.Object get_url_json (Language lang, string page_url)
throws ExtractionError
{
/* Extract the video ID directly from page_url */
/* Example: http://www.arte.tv/guide/fr/063676-003-A/yourope -> fr/063676-003-A */
string video_id;
try {
MatchInfo match;
var regex = new Regex (".*/(../.*)/");
regex.match (page_url, 0, out match);
video_id = match.fetch(1);
debug ("Video ID:\t'%s'", video_id);
} catch (RegexError e) {
GLib.warning ("%s", e.message);
throw new ExtractionError.EXTRACTION_FAILED ("Unable to extract the video ID");
}
/* Special case for Arte Journal which, as of 2016-02-22,
still requires the old URL extraction method. */
if (video_id.has_suffix ("/AJT")) {
json_uri = extract_string_from_page (page_url,
"arte_vp_url=['\"](https?://.*.json)['\"].*>");
debug ("Extracted JSON URI:\t'%s'", json_uri);
if (json_uri == null) {
throw new ExtractionError.EXTRACTION_FAILED ("Video URL Extraction Error");
}
} else {
json_uri = "https://api.arte.tv/api/player/v1/config/" + video_id + "?platform=ARTEPLUS7";
debug ("Constructed JSON URI:\t'%s'", json_uri);
}
/* download and parse the main JSON file */
var message = new Soup.Message ("GET", json_uri);
this.session.send_message (message);
// TODO detect if a video is only availabe after 23:00
var parser = new Json.Parser ();
try {
parser.load_from_data ((string) message.response_body.flatten ().data, -1);
} catch (Error e) {
throw new ExtractionError.EXTRACTION_FAILED ("Video URL Extraction Error");
}
var root_object = parser.get_root ().get_object ();
var player_object = root_object.get_object_member ("videoJsonPlayer");
return player_object.get_object_member ("VSR");
}
public string get_url (VideoQuality q, Language lang, string page_url)
throws ExtractionError
{
debug ("Initial Page URL:\t\t'%s'", page_url);
string uri = null;
Json.Object video_object = null;
var is_rtmp = false;
try {
if (last_page_url != page_url) {
streams_object = get_url_json (lang, page_url);
last_page_url = page_url;
player_uri = null;
}
// sorted from the best to worst
// LQ still exist for the Arte Journal
string[] quality_options = {"SQ_1", "EQ_1", "HQ_1", "MQ_1", "LQ_1"};
int quality_index = 2;
switch (q) {
case VideoQuality.LOW:
quality_index = 3;
break;
case VideoQuality.HIGH:
quality_index = 1;
break;
case VideoQuality.HD:
quality_index = 0;
break;
default: // MEDIUM is the default
quality_index = 2;
break;
}
// try all options starting from the desired one
for (int i = 0; i < quality_options.length; i++) {
int index = (quality_index + i) % quality_options.length;
video_object = streams_object.get_object_member ("HTTP_MP4_" + quality_options[index]);
if (video_object != null) {
break;
}
video_object = streams_object.get_object_member ("RTMP_" + quality_options[index]);
if (video_object != null) {
is_rtmp = true;
break;
}
}
if (video_object == null) {
throw new ExtractionError.EXTRACTION_FAILED ("Video URL Extraction Error");
}
if (!is_rtmp) {
uri = video_object.get_string_member ("url");
debug ("Extracted video uri:\t'%s'", uri);
return uri;
}
string url = video_object.get_string_member ("url");
string streamer = video_object.get_string_member ("streamer");
debug ("Streamer base:\t'%s'", streamer);
debug ("Streamer path:\t'%s'", url);
uri = streamer + url;
} catch (Error e) {
throw new ExtractionError.EXTRACTION_FAILED ("Video URL Extraction Error");
}
if (player_uri == null) {
// Try to figure out the player URI
try {
var regexp = "content=\"(http.*.swf)\\?";
var embeded_uri = "http://www.arte.tv/player/v2/index.php?json_url=" + json_uri + "&config=arte_tvguide";
player_uri = extract_string_from_page (embeded_uri, regexp);
debug ("Extract player URI:\t'%s'", player_uri);
if (player_uri == null) {
throw new ExtractionError.EXTRACTION_FAILED ("Player URL Extraction Error");
}
} catch (Error e) {
// Do not abort and try to play the video with a known old player URI.
// The server does not seems to always check the player validity, so it may work anyway.
debug ("Failed to extract the flash player URI! Trying to fallback...");
player_uri = "http://www.arte.tv/playerv2/jwplayer5/mediaplayer.5.7.1894.swf";
}
}
string stream_uri = uri + " swfVfy=1 swfUrl=" + player_uri;
debug ("Build stream URI:\t\t'%s'", stream_uri);
return stream_uri;
}
}
public class ImageUrlExtractor : IndirectUrlExtractor, UrlExtractor
{
public string get_url (VideoQuality q, Language lang, string page_url)
throws ExtractionError
{
// Takes a video page url and returns the image url
// Example: <meta content='http://www.arte.tv/papi/tvguide/images/1286686/W940H530/051448-000-A_venezianische_04-1415022308224.jpg' property='og:image'>
string regexp, image_url;
regexp = "<meta content=['\"](https?://.*.jpg)['\"] property=['\"]og:image['\"]>";
image_url = extract_string_from_page (page_url, regexp);
if (image_url == null)
throw new ExtractionError.EXTRACTION_FAILED ("Image URL Extraction Error");
return image_url;
}
}