-
Notifications
You must be signed in to change notification settings - Fork 86
/
Copy pathproxy.php
362 lines (317 loc) · 18.3 KB
/
proxy.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
<?php
// Private web proxy script by Heiswayi Nrird (https://heiswayi.nrird.com)
// Released under MIT license
// Free Software should work like this: whatever you take for free, you must give back for free.
ob_start("ob_gzhandler");
if (!function_exists("curl_init")) die ("This proxy requires PHP's cURL extension. Please install/enable it on your server and try again.");
//Adapted from http://www.php.net/manual/en/function.getallheaders.php#99814
if (!function_exists("getallheaders")) {
function getallheaders() {
$result = array();
foreach($_SERVER as $key => $value) {
if (substr($key, 0, 5) == "HTTP_") {
$key = str_replace(" ", "-", ucwords(strtolower(str_replace("_", " ", substr($key, 5)))));
$result[$key] = $value;
} else {
$result[$key] = $value;
}
}
return $result;
}
}
define("PROXY_PREFIX", "http" . (isset($_SERVER['HTTPS']) ? "s" : "") . "://" . $_SERVER["SERVER_NAME"] . ($_SERVER["SERVER_PORT"] != 80 ? ":" . $_SERVER["SERVER_PORT"] : "") . $_SERVER["SCRIPT_NAME"] . "/");
//Makes an HTTP request via cURL, using request data that was passed directly to this script.
function makeRequest($url) {
//Tell cURL to make the request using the brower's user-agent if there is one, or a fallback user-agent otherwise.
$user_agent = $_SERVER["HTTP_USER_AGENT"];
if (empty($user_agent)) {
$user_agent = "Mozilla/5.0 (compatible; nrird.xyz/proxy)";
}
$ch = curl_init();
curl_setopt($ch, CURLOPT_USERAGENT, $user_agent);
//Proxy the browser's request headers.
$browserRequestHeaders = getallheaders();
//(...but let cURL set some of these headers on its own.)
//TODO: The unset()s below assume that browsers' request headers
//will use casing (capitalizations) that appear within them.
unset($browserRequestHeaders["Host"]);
unset($browserRequestHeaders["Content-Length"]);
//Throw away the browser's Accept-Encoding header if any;
//let cURL make the request using gzip if possible.
unset($browserRequestHeaders["Accept-Encoding"]);
curl_setopt($ch, CURLOPT_ENCODING, "");
//Transform the associative array from getallheaders() into an
//indexed array of header strings to be passed to cURL.
$curlRequestHeaders = array();
foreach ($browserRequestHeaders as $name => $value) {
$curlRequestHeaders[] = $name . ": " . $value;
}
curl_setopt($ch, CURLOPT_HTTPHEADER, $curlRequestHeaders);
//Proxy any received GET/POST/PUT data.
switch ($_SERVER["REQUEST_METHOD"]) {
case "GET":
$getData = array();
foreach ($_GET as $key => $value) {
$getData[] = urlencode($key) . "=" . urlencode($value);
}
if (count($getData) > 0) {
//Remove any GET data from the URL, and re-add what was read.
//TODO: Is the code in this "GET" case necessary?
//It reads, strips, then re-adds all GET data; this may be a no-op.
$url = substr($url, 0, strrpos($url, "?"));
$url .= "?" . implode("&", $getData);
}
break;
case "POST":
curl_setopt($ch, CURLOPT_POST, true);
//For some reason, $HTTP_RAW_POST_DATA isn't working as documented at
//http://php.net/manual/en/reserved.variables.httprawpostdata.php
//but the php://input method works. This is likely to be flaky
//across different server environments.
//More info here: http://stackoverflow.com/questions/8899239/http-raw-post-data-not-being-populated-after-upgrade-to-php-5-3
curl_setopt($ch, CURLOPT_POSTFIELDS, file_get_contents("php://input"));
break;
case "PUT":
curl_setopt($ch, CURLOPT_PUT, true);
curl_setopt($ch, CURLOPT_INFILE, fopen("php://input"));
break;
}
//Other cURL options.
curl_setopt($ch, CURLOPT_HEADER, true);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt ($ch, CURLOPT_FAILONERROR, true);
//Set the request URL.
curl_setopt($ch, CURLOPT_URL, $url);
//Make the request.
$response = curl_exec($ch);
$responseInfo = curl_getinfo($ch);
$headerSize = curl_getinfo($ch, CURLINFO_HEADER_SIZE);
curl_close($ch);
//Setting CURLOPT_HEADER to true above forces the response headers and body
//to be output together--separate them.
$responseHeaders = substr($response, 0, $headerSize);
$responseBody = substr($response, $headerSize);
return array("headers" => $responseHeaders, "body" => $responseBody, "responseInfo" => $responseInfo);
}
//Converts relative URLs to absolute ones, given a base URL.
//Modified version of code found at http://nashruddin.com/PHP_Script_for_Converting_Relative_to_Absolute_URL
function rel2abs($rel, $base) {
if (empty($rel)) $rel = ".";
if (parse_url($rel, PHP_URL_SCHEME) != "" || strpos($rel, "//") === 0) return $rel; //Return if already an absolute URL
if ($rel[0] == "#" || $rel[0] == "?") return $base.$rel; //Queries and anchors
extract(parse_url($base)); //Parse base URL and convert to local variables: $scheme, $host, $path
$path = isset($path) ? preg_replace('#/[^/]*$#', "", $path) : "/"; //Remove non-directory element from path
if ($rel[0] == '/') $path = ""; //Destroy path if relative url points to root
$port = isset($port) && $port != 80 ? ":" . $port : "";
$auth = "";
if (isset($user)) {
$auth = $user;
if (isset($pass)) {
$auth .= ":" . $pass;
}
$auth .= "@";
}
$abs = "$auth$host$path$port/$rel"; //Dirty absolute URL
for ($n = 1; $n > 0; $abs = preg_replace(array("#(/\.?/)#", "#/(?!\.\.)[^/]+/\.\./#"), "/", $abs, -1, $n)) {} //Replace '//' or '/./' or '/foo/../' with '/'
return $scheme . "://" . $abs; //Absolute URL is ready.
}
//Proxify contents of url() references in blocks of CSS text.
function proxifyCSS($css, $baseURL) {
return preg_replace_callback(
'/url\((.*?)\)/i',
function($matches) use ($baseURL) {
$url = $matches[1];
//Remove any surrounding single or double quotes from the URL so it can be passed to rel2abs - the quotes are optional in CSS
//Assume that if there is a leading quote then there should be a trailing quote, so just use trim() to remove them
if (strpos($url, "'") === 0) {
$url = trim($url, "'");
}
if (strpos($url, "\"") === 0) {
$url = trim($url, "\"");
}
if (stripos($url, "data:") === 0) return "url(" . $url . ")"; //The URL isn't an HTTP URL but is actual binary data. Don't proxify it.
return "url(" . PROXY_PREFIX . rel2abs($url, $baseURL) . ")";
},
$css);
}
// Create log
function recordLog($url) {
$userip = $_SERVER['REMOTE_ADDR'];
$rdate = date("d-m-Y", time());
$data = $rdate.','.$userip.','.$url.PHP_EOL;
$logfile = 'logs/'.$userip.'_log.txt';
$fp = fopen($logfile, 'a');
fwrite($fp, $data);
}
$proxy_prefix = PROXY_PREFIX;
$htmlcode = <<<ENDHTML
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8" />
<title>Upload</title>
<style>
*,::after,::before{box-sizing:border-box}body{margin:0;font-family:-apple-system,BlinkMacSystemFont,"Segoe UI",Roboto,"Helvetica Neue",Arial,"Noto Sans",sans-serif,"Apple Color Emoji","Segoe UI Emoji","Segoe UI Symbol","Noto Color Emoji";font-size:1rem;font-weight:400;line-height:1.5;color:#212529;background-color:#fff;-webkit-text-size-adjust:100%;-webkit-tap-highlight-color:transparent}[tabindex="-1"]:focus:not(:focus-visible){outline:0!important}hr{margin:1rem 0;color:inherit;background-color:currentColor;border:0;opacity:.25}hr:not([size]){height:1px}h1,h2,h3,h4,h5,h6{margin-top:0;margin-bottom:.5rem;font-weight:500;line-height:1.2}h1{font-size:2.5rem}h2{font-size:2rem}h3{font-size:1.75rem}h4{font-size:1.5rem}h5{font-size:1.25rem}h6{font-size:1rem}p{margin-top:0;margin-bottom:1rem}abbr[data-original-title],abbr[title]{text-decoration:underline;-webkit-text-decoration:underline dotted;text-decoration:underline dotted;cursor:help;-webkit-text-decoration-skip-ink:none;text-decoration-skip-ink:none}address{margin-bottom:1rem;font-style:normal;line-height:inherit}ol,ul{padding-left:2rem}dl,ol,ul{margin-top:0;margin-bottom:1rem}ol ol,ol ul,ul ol,ul ul{margin-bottom:0}dt{font-weight:700}dd{margin-bottom:.5rem;margin-left:0}blockquote{margin:0 0 1rem}b,strong{font-weight:bolder}small{font-size:.875em}sub,sup{position:relative;font-size:.75em;line-height:0;vertical-align:baseline}sub{bottom:-.25em}sup{top:-.5em}a{color:#0d6efd;text-decoration:none}a:hover{color:#024dbc;text-decoration:underline}a:not([href]),a:not([href]):hover{color:inherit;text-decoration:none}code,kbd,pre,samp{font-family:SFMono-Regular,Menlo,Monaco,Consolas,"Liberation Mono","Courier New",monospace;font-size:1em}pre{display:block;margin-top:0;margin-bottom:1rem;overflow:auto;font-size:.875em}pre code{font-size:inherit;color:inherit;word-break:normal}code{font-size:.875em;color:#d63384;word-wrap:break-word}a>code{color:inherit}kbd{padding:.2rem .4rem;font-size:.875em;color:#fff;background-color:#212529;border-radius:.2rem}kbd kbd{padding:0;font-size:1em;font-weight:700}figure{margin:0 0 1rem}img{vertical-align:middle}svg{overflow:hidden;vertical-align:middle}table{border-collapse:collapse}caption{padding-top:.5rem;padding-bottom:.5rem;color:#6c757d;text-align:left;caption-side:bottom}th{text-align:inherit}label{display:inline-block;margin-bottom:.5rem}button{border-radius:0}button:focus{outline:1px dotted;outline:5px auto -webkit-focus-ring-color}button,input,optgroup,select,textarea{margin:0;font-family:inherit;font-size:inherit;line-height:inherit}button,input{overflow:visible}button,select{text-transform:none}select{word-wrap:normal}[list]::-webkit-calendar-picker-indicator{display:none}[type=button],[type=reset],[type=submit],button{-webkit-appearance:button}[type=button]:not(:disabled),[type=reset]:not(:disabled),[type=submit]:not(:disabled),button:not(:disabled){cursor:pointer}::-moz-focus-inner{padding:0;border-style:none}input[type=date],input[type=datetime-local],input[type=month],input[type=time]{-webkit-appearance:textfield}textarea{overflow:auto;resize:vertical}fieldset{min-width:0;padding:0;margin:0;border:0}legend{float:left;width:100%;padding:0;margin-bottom:.5rem;font-size:1.5rem;line-height:inherit;color:inherit;white-space:normal}mark{padding:.2em;background-color:#fcf8e3}progress{vertical-align:baseline}::-webkit-datetime-edit{overflow:visible;line-height:0}[type=search]{outline-offset:-2px;-webkit-appearance:textfield}::-webkit-search-decoration{-webkit-appearance:none}::-webkit-color-swatch-wrapper{padding:0}::-webkit-file-upload-button{font:inherit;-webkit-appearance:button}output{display:inline-block}summary{display:list-item;cursor:pointer}template{display:none}main{display:block}[hidden]{display:none!important}
form {
display: flex;
flex-direction: column;
gap: 1ch;
padding: 12px;
}
input[type="submit"] {
width: fit-content;
}
</style>
</head>
<body>
<form onsubmit="window.location.href='$proxy_prefix' + document.getElementById('site').value; return false;">
<div>Enter the full URL to proxify:</div>
<input type="text" id="site" placeholder="http://www.google.com" required/>
<input type="submit" value="Proxify"/>
</form>
</body>
</html>
ENDHTML;
$url = substr($_SERVER["REQUEST_URI"], strlen($_SERVER["SCRIPT_NAME"]) + 1);
if (empty($url)) die($htmlcode);
if (strpos($url, "//") === 0) $url = "http:" . $url; //Assume that any supplied URLs starting with // are HTTP URLs.
if (!preg_match("@^.*://@", $url)) $url = "http://" . $url; //Assume that any supplied URLs without a scheme are HTTP URLs.
// recordLog($url);
$response = makeRequest($url);
$rawResponseHeaders = $response["headers"];
$responseBody = $response["body"];
$responseInfo = $response["responseInfo"];
//cURL can make multiple requests internally (while following 302 redirects), and reports
//headers for every request it makes. Only proxy the last set of received response headers,
//corresponding to the final request made by cURL for any given call to makeRequest().
$responseHeaderBlocks = array_filter(explode("\r\n\r\n", $rawResponseHeaders));
$lastHeaderBlock = end($responseHeaderBlocks);
$headerLines = explode("\r\n", $lastHeaderBlock);
foreach ($headerLines as $header) {
if (stripos($header, "Content-Length") === false && stripos($header, "Transfer-Encoding") === false) {
header($header);
}
}
$contentType = "";
if (isset($responseInfo["content_type"])) $contentType = $responseInfo["content_type"];
//This is presumably a web page, so attempt to proxify the DOM.
if (stripos($contentType, "text/html") !== false) {
//Attempt to normalize character encoding.
$responseBody = mb_convert_encoding($responseBody, "HTML-ENTITIES", mb_detect_encoding($responseBody));
//Parse the DOM.
$doc = new DomDocument();
@$doc->loadHTML($responseBody);
$xpath = new DOMXPath($doc);
//Rewrite forms so that their actions point back to the proxy.
foreach($xpath->query('//form') as $form) {
$method = $form->getAttribute("method");
$action = $form->getAttribute("action");
//If the form doesn't have an action, the action is the page itself.
//Otherwise, change an existing action to an absolute version.
$action = empty($action) ? $url : rel2abs($action, $url);
//Rewrite the form action to point back at the proxy.
$form->setAttribute("action", PROXY_PREFIX . $action);
}
//Profixy <style> tags.
foreach($xpath->query('//style') as $style) {
$style->nodeValue = proxifyCSS($style->nodeValue, $url);
}
//Proxify tags with a "style" attribute.
foreach ($xpath->query('//*[@style]') as $element) {
$element->setAttribute("style", proxifyCSS($element->getAttribute("style"), $url));
}
//Proxify any of these attributes appearing in any tag.
$proxifyAttributes = array("href", "src");
foreach($proxifyAttributes as $attrName) {
foreach($xpath->query('//*[@' . $attrName . ']') as $element) { //For every element with the given attribute...
$attrContent = $element->getAttribute($attrName);
if ($attrName == "href" && (stripos($attrContent, "javascript:") === 0 || stripos($attrContent, "mailto:") === 0)) continue;
$attrContent = rel2abs($attrContent, $url);
$attrContent = PROXY_PREFIX . $attrContent;
$element->setAttribute($attrName, $attrContent);
}
}
//Attempt to force AJAX requests to be made through the proxy by
//wrapping window.XMLHttpRequest.prototype.open in order to make
//all request URLs absolute and point back to the proxy.
//The rel2abs() JavaScript function serves the same purpose as the server-side one in this file,
//but is used in the browser to ensure all AJAX request URLs are absolute and not relative.
//Uses code from these sources:
//http://stackoverflow.com/questions/7775767/javascript-overriding-xmlhttprequest-open
//https://gist.github.com/1088850
//TODO: This is obviously only useful for browsers that use XMLHttpRequest but
//it's better than nothing.
$head = $xpath->query('//head')->item(0);
$body = $xpath->query('//body')->item(0);
$prependElem = $head != NULL ? $head : $body;
//Only bother trying to apply this hack if the DOM has a <head> or <body> element;
//insert some JavaScript at the top of whichever is available first.
//Protects against cases where the server sends a Content-Type of "text/html" when
//what's coming back is most likely not actually HTML.
//TODO: Do this check before attempting to do any sort of DOM parsing?
if ($prependElem != NULL) {
$scriptElem = $doc->createElement("script",
'(function() {
if (window.XMLHttpRequest) {
function parseURI(url) {
var m = String(url).replace(/^\s+|\s+$/g, "").match(/^([^:\/?#]+:)?(\/\/(?:[^:@]*(?::[^:@]*)?@)?(([^:\/?#]*)(?::(\d*))?))?([^?#]*)(\?[^#]*)?(#[\s\S]*)?/);
// authority = "//" + user + ":" + pass "@" + hostname + ":" port
return (m ? {
href : m[0] || "",
protocol : m[1] || "",
authority: m[2] || "",
host : m[3] || "",
hostname : m[4] || "",
port : m[5] || "",
pathname : m[6] || "",
search : m[7] || "",
hash : m[8] || ""
} : null);
}
function rel2abs(base, href) { // RFC 3986
function removeDotSegments(input) {
var output = [];
input.replace(/^(\.\.?(\/|$))+/, "")
.replace(/\/(\.(\/|$))+/g, "/")
.replace(/\/\.\.$/, "/../")
.replace(/\/?[^\/]*/g, function (p) {
if (p === "/..") {
output.pop();
} else {
output.push(p);
}
});
return output.join("").replace(/^\//, input.charAt(0) === "/" ? "/" : "");
}
href = parseURI(href || "");
base = parseURI(base || "");
return !href || !base ? null : (href.protocol || base.protocol) +
(href.protocol || href.authority ? href.authority : base.authority) +
removeDotSegments(href.protocol || href.authority || href.pathname.charAt(0) === "/" ? href.pathname : (href.pathname ? ((base.authority && !base.pathname ? "/" : "") + base.pathname.slice(0, base.pathname.lastIndexOf("/") + 1) + href.pathname) : base.pathname)) +
(href.protocol || href.authority || href.pathname ? href.search : (href.search || base.search)) +
href.hash;
}
var proxied = window.XMLHttpRequest.prototype.open;
window.XMLHttpRequest.prototype.open = function() {
if (arguments[1] !== null && arguments[1] !== undefined) {
var url = arguments[1];
url = rel2abs("' . $url . '", url);
url = "' . PROXY_PREFIX . '" + url;
arguments[1] = url;
}
return proxied.apply(this, [].slice.call(arguments));
};
}
})();'
);
$scriptElem->setAttribute("type", "text/javascript");
$prependElem->insertBefore($scriptElem, $prependElem->firstChild);
}
echo "<!-- Proxified page constructed by https://dev.nrird.com/web-proxy/ -->\n" . $doc->saveHTML();
} else if (stripos($contentType, "text/css") !== false) { //This is CSS, so proxify url() references.
echo proxifyCSS($responseBody, $url);
} else { //This isn't a web page or CSS, so serve unmodified through the proxy with the correct headers (images, JavaScript, etc.)
header("Content-Length: " . strlen($responseBody));
echo $responseBody;
}