diff --git a/.gitattributes b/.gitattributes index 35af101..d8664ed 100644 --- a/.gitattributes +++ b/.gitattributes @@ -3,3 +3,4 @@ /Src/Sunra/PhpSimple/simplehtmldom_1_5/example export-ignore /Src/Sunra/PhpSimple/simplehtmldom_1_5/manual export-ignore /Src/Sunra/PhpSimple/simplehtmldom_1_5/testcase export-ignore +/Src/Sunra/PhpSimple/simplehtmldom_1_5/change_log.txt export-ignore diff --git a/README.md b/README.md index 1fb2302..945a683 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ php-simple-html-dom-parser ========================== -Version 1.5 +Version 1.5.2 Adaptation for Composer and PSR-0 of: @@ -20,7 +20,7 @@ Install composer.phar ```json "require": { - "sunra/php-simple-html-dom-parser": "v1.5.0" + "sunra/php-simple-html-dom-parser": "1.5.2" } ``` diff --git a/Src/Sunra/PhpSimple/simplehtmldom_1_5/simple_html_dom.php b/Src/Sunra/PhpSimple/simplehtmldom_1_5/simple_html_dom.php index aa4357f..279d589 100644 --- a/Src/Sunra/PhpSimple/simplehtmldom_1_5/simple_html_dom.php +++ b/Src/Sunra/PhpSimple/simplehtmldom_1_5/simple_html_dom.php @@ -73,7 +73,7 @@ // ----------------------------------------------------------------------------- // get html dom from file // $maxlen is defined in the code as PHP_STREAM_COPY_ALL which is defined as -1. -function file_get_html($url, $use_include_path = false, $context=null, $offset = -1, $maxLen=-1, $lowercase = true, $forceTagsClosed=true, $target_charset = DEFAULT_TARGET_CHARSET, $stripRN=true, $defaultBRText=DEFAULT_BR_TEXT, $defaultSpanText=DEFAULT_SPAN_TEXT) +function file_get_html($url, $use_include_path = false, $context=null, $offset=0, $maxLen=-1, $lowercase = true, $forceTagsClosed=true, $target_charset = DEFAULT_TARGET_CHARSET, $stripRN=true, $defaultBRText=DEFAULT_BR_TEXT, $defaultSpanText=DEFAULT_SPAN_TEXT) { // We DO force the tags to be terminated. $dom = new simple_html_dom(null, $lowercase, $forceTagsClosed, $target_charset, $stripRN, $defaultBRText, $defaultSpanText); @@ -131,7 +131,7 @@ class simple_html_dom_node public $tag_start = 0; private $dom = null; - function __construct($dom) + function __construct(simple_html_dom $dom) { $this->dom = $dom; $dom->nodes[] = $this; @@ -504,8 +504,14 @@ function makeup() return $ret . $this->_[HDOM_INFO_ENDSPACE] . '>'; } - // find elements by css selector - //PaperG - added ability for find to lowercase the value of the selector. + /** + * find elements by css selector + * PaperG - added ability for find to lowercase the value of the selector. + * @param string $selector + * @param int|null $idx + * @param bool $lowercase + * @return simple_html_dom_node[]|simple_html_dom_node|null + */ function find($selector, $idx=null, $lowercase=false) { $selectors = $this->parse_selector($selector); @@ -946,8 +952,8 @@ function get_display_size() } // camel naming conventions - function getAllAttributes() {return $this->attr;} - function getAttribute($name) {return $this->__get($name);} + function getAllAttributes() {return array_map('html_entity_decode', $this->attr);} + function getAttribute($name) {return html_entity_decode($this->__get($name));} function setAttribute($name, $value) {$this->__set($name, $value);} function hasAttribute($name) {return $this->__isset($name);} function removeAttribute($name) {$this->__set($name, null);} @@ -977,6 +983,7 @@ function appendChild($node) {$node->parent($this); return $node;} */ class simple_html_dom { + /** @var simple_html_dom_node $root */ public $root = null; public $nodes = array(); public $callback = null; @@ -1238,7 +1245,7 @@ protected function parse_charset() if (empty($charset)) { // Have php try to detect the encoding from the text given to us. - $charset = mb_detect_encoding($this->root->plaintext . "ascii", $encoding_list = array( "UTF-8", "CP1252" ) ); + $charset = (function_exists('mb_detect_encoding')) ? mb_detect_encoding($this->root->plaintext . "ascii", $encoding_list = array( "UTF-8", "CP1252" ) ) : false; if (is_object($debugObject)) {$debugObject->debugLog(2, 'mb_detect found: ' . $charset);} // and if this doesn't work... then we need to just wrongheadedly assume it's UTF-8 so that we can move on - cause this will usually give us most of what we need... @@ -1724,5 +1731,3 @@ function getElementByTagName($name) {return $this->find($name, 0);} function getElementsByTagName($name, $idx=-1) {return $this->find($name, $idx);} function loadFile() {$args = func_get_args();$this->load_file($args);} } - -?> diff --git a/composer.json b/composer.json index 6d21ae1..03473d2 100644 --- a/composer.json +++ b/composer.json @@ -1,24 +1,32 @@ { - "name": "sunra/php-simple-html-dom-parser", + "name": "eddieace/php-simple-html-dom-parser", "type": "library", - "description": "Composer adaptation of: A HTML DOM parser written in PHP5+ let you manipulate HTML in a very easy way! Require PHP 5+. Supports invalid HTML. Find tags on an HTML page with selectors just like jQuery. Extract contents from HTML in a single line.", + "description": "Composer adaptation of: A HTML DOM parser written in PHP5+ let you manipulate HTML in a very easy way! Require PHP 7+. Supports invalid HTML. Find tags on an HTML page with selectors just like jQuery. Extract contents from HTML in a single line.", "keywords": ["html", "dom", "parser"], - "homepage": "https://github.com/sunra/php-simple-html-dom-parser", + "homepage": "https://github.com/eddieace/php-simple-html-dom-parser", "license": "MIT", "authors": [ - { + { + "name": "S.C. Chen", "homepage": "http://sourceforge.net/projects/simplehtmldom/" }, { "name": "Sunra", "email": "sunra@yandex.ru", "homepage": "https://github.com/sunra" + }, + { + "name": "Edvard", + "email": "edvard@4film.se", + "homepage": "https://github.com/eddieace" } ], + "minimum-stability": "stable", "require": { - "php": ">=5.3.2" + "php": ">=7.1.0", + "ext-mbstring": "*" }, "autoload": { "psr-0": { "Sunra\\PhpSimple\\HtmlDomParser": "Src/" } } -} \ No newline at end of file +}