From 7e18c070b357d2ca969ec1f1cc4cf51483a98226 Mon Sep 17 00:00:00 2001 From: Denis Ivanov Date: Sun, 29 Dec 2019 15:01:40 +0400 Subject: [PATCH 1/8] #18 Fix but without namespaces. --- .../AngleSharp.XPath.Tests.csproj | 1 + .../HtmlDocumentNavigatorTests.cs | 104 +++++++++------- src/AngleSharp.XPath/Extensions.cs | 85 ++++++------- src/AngleSharp.XPath/HtmlDocumentNavigable.cs | 8 +- src/AngleSharp.XPath/HtmlDocumentNavigator.cs | 113 ++++++++---------- 5 files changed, 154 insertions(+), 157 deletions(-) diff --git a/src/AngleSharp.XPath.Tests/AngleSharp.XPath.Tests.csproj b/src/AngleSharp.XPath.Tests/AngleSharp.XPath.Tests.csproj index d761a13..0634ae7 100644 --- a/src/AngleSharp.XPath.Tests/AngleSharp.XPath.Tests.csproj +++ b/src/AngleSharp.XPath.Tests/AngleSharp.XPath.Tests.csproj @@ -1,5 +1,6 @@  + diff --git a/src/AngleSharp.XPath.Tests/HtmlDocumentNavigatorTests.cs b/src/AngleSharp.XPath.Tests/HtmlDocumentNavigatorTests.cs index 9031edc..a2cf8a0 100644 --- a/src/AngleSharp.XPath.Tests/HtmlDocumentNavigatorTests.cs +++ b/src/AngleSharp.XPath.Tests/HtmlDocumentNavigatorTests.cs @@ -1,65 +1,87 @@ +using System.Xml; +using AngleSharp.Xml.Parser; +using AngleSharp.Html.Parser; +using NUnit.Framework; +using System.Threading.Tasks; + namespace AngleSharp.XPath.Tests { - using AngleSharp.Html.Parser; - using NUnit.Framework; - using System.Threading.Tasks; - [TestFixture] - public class HtmlDocumentNavigatorTests - { - [Test, Retry(5)] - public async Task SelectSingleNodeTest() - { - // Arrange - const string address = "https://stackoverflow.com/questions/39471800/is-anglesharps-htmlparser-threadsafe"; - var config = Configuration.Default.WithDefaultLoader(); - var document = await BrowsingContext.New(config).OpenAsync(address); + public class HtmlDocumentNavigatorTests + { + [Test, Retry(5)] + public async Task SelectSingleNodeTest() + { + // Arrange + const string address = "https://stackoverflow.com/questions/39471800/is-anglesharps-htmlparser-threadsafe"; + var config = Configuration.Default.WithDefaultLoader(); + var document = await BrowsingContext.New(config).OpenAsync(address); - // Act - var content = document.DocumentElement.SelectSingleNode("//div[@id='content']"); + // Act + var content = document.DocumentElement.SelectSingleNode("//div[@id='content']"); - // Assert - Assert.That(content, Is.Not.Null); - } + // Assert + Assert.That(content, Is.Not.Null); + } - [Test] - public void SelectNodes_SelectList_ShouldReturnList() - { - // Arrange - const string html = - @"
    + [Test] + public void SelectNodes_SelectList_ShouldReturnList() + { + // Arrange + const string html = + @"
    1. First
    2. Second
    3. Third
    "; - var parser = new HtmlParser(); - var document = parser.ParseDocument(html); + var parser = new HtmlParser(); + var document = parser.ParseDocument(html); + + // Act + var nodes = document.DocumentElement.SelectNodes("//li"); - // Act - var nodes = document.DocumentElement.SelectNodes("//li"); + // Assert + Assert.That(nodes, Has.Count.EqualTo(3)); + } - // Assert - Assert.That(nodes, Has.Count.EqualTo(3)); - } - [Test] public void SelectPrecedingNodeInDocumentWithDoctype_ShouldReturnNode() { // Arrange - const string html = - @" + const string html = + @"
    "; - var parser = new HtmlParser(); - var document = parser.ParseDocument(html); + var parser = new HtmlParser(); + var document = parser.ParseDocument(html); + + // Act + var node = document.DocumentElement.SelectSingleNode("//div/preceding::span"); + + // Assert + Assert.That(node, Is.Not.Null); + } + + [Test] + public void SelectNodeWithNamespace_ShouldReturnNode() + { + // Arrange + var xml = "https://www.test.com/de/accounts/profileweekly0.4"; + var parser = new XmlParser(); + var doc = parser.ParseDocument(xml); + var namespaceManager = new XmlNamespaceManager(new NameTable()); + + namespaceManager.AddNamespace("xhtml", "http://www.w3.org/1999/xhtml"); + namespaceManager.AddNamespace("d", "http://www.sitemaps.org/schemas/sitemap/0.9"); - // Act - var node = document.DocumentElement.SelectSingleNode("//div/preceding::span"); + // Act + var node = doc.DocumentElement.SelectSingleNode("/urlset/url/link"); - // Assert - Assert.That(node, Is.Not.Null); + // Assert + Assert.IsNotNull(node); + Assert.That(node.NodeName, Is.EqualTo("xhtml:link")); } - } + } } diff --git a/src/AngleSharp.XPath/Extensions.cs b/src/AngleSharp.XPath/Extensions.cs index e13251d..3c57b2a 100644 --- a/src/AngleSharp.XPath/Extensions.cs +++ b/src/AngleSharp.XPath/Extensions.cs @@ -1,40 +1,34 @@ +using AngleSharp.Dom; +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.Xml; +using System.Xml.XPath; + namespace AngleSharp.XPath { - using AngleSharp.Dom; - using System; - using System.Collections.Generic; - using System.Diagnostics; - using System.Xml; - using System.Xml.XPath; - /// /// Hosts the extension methods for XPath parsing. /// public static class Extensions - { + { /// /// Creates a new navigator for the given document. /// /// The document to extend. /// The navigator for XPath expressions. - public static XPathNavigator CreateNavigator(this IDocument document) - { + public static XPathNavigator CreateNavigator(this IDocument document) + { var doc = document ?? throw new ArgumentNullException(nameof(document)); return new HtmlDocumentNavigator(doc, doc.DocumentElement); - } - - [DebuggerStepThrough] - internal static String GetOrAdd(this XmlNameTable table, String array) - { - var s = table.Get(array); + } - if (s == null) - { - return table.Add(array); - } - - return s; - } + [DebuggerStepThrough] + internal static string GetOrAdd(this XmlNameTable table, string array) + { + var s = table.Get(array); + return s ?? table.Add(array); + } /// /// Selects a single node (or returns null) matching the expression. @@ -43,21 +37,14 @@ internal static String GetOrAdd(this XmlNameTable table, String array) /// The XPath expression. /// The node matching query, if any. /// Throws if or is null - public static INode SelectSingleNode(this IElement element, String xpath) - { + public static INode SelectSingleNode(this IElement element, string xpath) + { var el = element ?? throw new ArgumentNullException(nameof(element)); var xp = xpath ?? throw new ArgumentNullException(nameof(xpath)); - var nav = new HtmlDocumentNavigator(el.Owner, el); - var it = nav.Select(xp); - - if (it.MoveNext()) - { - var node = (HtmlDocumentNavigator)it.Current; - return node.CurrentNode; - } - - return null; - } + var nav = new HtmlDocumentNavigator(el.Owner, el); + var it = nav.SelectSingleNode(xp); + return ((HtmlDocumentNavigator) it)?.CurrentNode; + } /// /// Selects a list of nodes matching the expression. @@ -66,22 +53,24 @@ public static INode SelectSingleNode(this IElement element, String xpath) /// The XPath expression. /// List of nodes matching query. /// Throws if or is null - public static List SelectNodes(this IElement element, String xpath) + public static List SelectNodes(this IElement element, string xpath) { var el = element ?? throw new ArgumentNullException(nameof(element)); var xp = xpath ?? throw new ArgumentNullException(nameof(xpath)); var nav = new HtmlDocumentNavigator(el.Owner, el); - var it = nav.Select(xp); - var result = new List(); + var it = nav.Select(xp); + var result = new List(); - while (it.MoveNext()) - { - var naviagtor = (HtmlDocumentNavigator) it.Current; - var e = naviagtor.CurrentNode; - result.Add(e); - } + while (it.MoveNext()) + { + // ReSharper disable once IdentifierTypo + var naviagtor = (HtmlDocumentNavigator) it.Current; + // ReSharper disable once PossibleNullReferenceException + var e = naviagtor.CurrentNode; + result.Add(e); + } - return result; - } - } + return result; + } + } } diff --git a/src/AngleSharp.XPath/HtmlDocumentNavigable.cs b/src/AngleSharp.XPath/HtmlDocumentNavigable.cs index 8f174c3..2023fb8 100644 --- a/src/AngleSharp.XPath/HtmlDocumentNavigable.cs +++ b/src/AngleSharp.XPath/HtmlDocumentNavigable.cs @@ -1,9 +1,9 @@ +using AngleSharp.Dom; +using System; +using System.Xml.XPath; + namespace AngleSharp.XPath { - using AngleSharp.Dom; - using System; - using System.Xml.XPath; - /// public class HtmlDocumentNavigable : IXPathNavigable { diff --git a/src/AngleSharp.XPath/HtmlDocumentNavigator.cs b/src/AngleSharp.XPath/HtmlDocumentNavigator.cs index fb182a3..494ba3b 100644 --- a/src/AngleSharp.XPath/HtmlDocumentNavigator.cs +++ b/src/AngleSharp.XPath/HtmlDocumentNavigator.cs @@ -1,33 +1,32 @@ +using AngleSharp.Dom; +using System; +using System.Xml; +using System.Xml.XPath; + namespace AngleSharp.XPath { - using AngleSharp.Dom; - using System; - using System.Xml; - using System.Xml.XPath; - /// public class HtmlDocumentNavigator : XPathNavigator { private readonly IDocument _document; - private readonly NameTable _nameTable; private INode _currentNode; - private Int32 _attrIndex; + private int _attrIndex; /// /// Creates a new XPath navigator for the given document using the provided root node. /// /// The document to navigate. /// The node to start navigation. - public HtmlDocumentNavigator(IDocument document, INode currentNode) + public HtmlDocumentNavigator(IDocument document, INode currentNode) { _document = document ?? throw new ArgumentNullException(nameof(document)); - _nameTable = new NameTable(); + NameTable = new NameTable(); _currentNode = currentNode ?? throw new ArgumentNullException(nameof(currentNode)); - _attrIndex = -1; - } + _attrIndex = -1; + } /// - public override String BaseURI => _document.BaseUri; + public override string BaseURI => _document.BaseUri; /// /// Gets the currently selected node. @@ -40,38 +39,37 @@ public HtmlDocumentNavigator(IDocument document, INode currentNode) private IElement CurrentElement => CurrentNode as IElement; /// - public override Boolean HasAttributes => CurrentElement != null && CurrentElement.Attributes.Length > 0; + public override bool HasAttributes => CurrentElement != null && CurrentElement.Attributes.Length > 0; /// - public override Boolean IsEmptyElement => !_currentNode.HasChildNodes; + public override bool IsEmptyElement => !_currentNode.HasChildNodes; /// - public override String LocalName - { - get - { - if (_attrIndex != -1) - { - return NameTable.GetOrAdd(CurrentElement.Attributes[_attrIndex].Name); - } - - if (CurrentNode is IElement) - { - return NameTable.GetOrAdd(CurrentElement.LocalName); - } + public override string LocalName => + _attrIndex != -1 + ? NameTable.GetOrAdd(CurrentElement.Attributes[_attrIndex].LocalName) + : NameTable.GetOrAdd(CurrentNode is IElement e ? e.LocalName : string.Empty); - return NameTable.GetOrAdd(CurrentNode.NodeName); - } - } + /// + public override string Name => + _attrIndex != -1 + ? NameTable.GetOrAdd(CurrentElement.Attributes[_attrIndex].Name) + : NameTable.GetOrAdd(_currentNode.NodeName); /// - public override String Name => NameTable.GetOrAdd(_currentNode.NodeName); + public override string NamespaceURI => string.Empty + /*_attrIndex != -1 + ? NameTable.GetOrAdd(CurrentElement.Attributes[_attrIndex].NamespaceUri ?? string.Empty) + : NameTable.GetOrAdd(CurrentElement?.NamespaceUri ?? string.Empty)*/; /// - public override String NamespaceURI => String.Empty;// NameTable.GetOrAdd(CurrentElement?.NamespaceUri ?? string.Empty); + public override string Prefix => + _attrIndex != 1 + ? NameTable.GetOrAdd(CurrentElement.Attributes[_attrIndex].Prefix ?? string.Empty) + : NameTable.GetOrAdd(CurrentElement?.Prefix ?? string.Empty); /// - public override XmlNameTable NameTable => _nameTable; + public override XmlNameTable NameTable { get; } /// public override XPathNodeType NodeType @@ -94,14 +92,9 @@ public override XPathNodeType NodeType case Dom.NodeType.DocumentType: return XPathNodeType.Element; - - case Dom.NodeType.Element: - if (_attrIndex != -1) - { - return XPathNodeType.Attribute; - } - return XPathNodeType.Element; + case Dom.NodeType.Element: + return _attrIndex != -1 ? XPathNodeType.Attribute : XPathNodeType.Element; case Dom.NodeType.ProcessingInstruction: return XPathNodeType.ProcessingInstruction; @@ -120,10 +113,7 @@ public override XPathNodeType NodeType } /// - public override String Prefix => String.Empty;// _nameTable.GetOrAdd(CurrentElement?.Prefix ?? string.Empty); - - /// - public override String Value + public override string Value { get { @@ -152,14 +142,9 @@ public override String Value return documentType.Name; case Dom.NodeType.Element: - if (_attrIndex != -1) - { - return CurrentElement.Attributes[_attrIndex].Value; - } + return _attrIndex != -1 ? CurrentElement.Attributes[_attrIndex].Value : _currentNode.TextContent; - return _currentNode.TextContent; - - case Dom.NodeType.Entity: + case Dom.NodeType.Entity: return _currentNode.TextContent; case Dom.NodeType.EntityReference: @@ -188,7 +173,7 @@ public override XPathNavigator Clone() } /// - public override Boolean IsSamePosition(XPathNavigator other) + public override bool IsSamePosition(XPathNavigator other) { if (!(other is HtmlDocumentNavigator navigator)) { @@ -199,7 +184,7 @@ public override Boolean IsSamePosition(XPathNavigator other) } /// - public override Boolean MoveTo(XPathNavigator other) + public override bool MoveTo(XPathNavigator other) { if (!(other is HtmlDocumentNavigator navigator)) { @@ -217,7 +202,7 @@ public override Boolean MoveTo(XPathNavigator other) } /// - public override Boolean MoveToFirstAttribute() + public override bool MoveToFirstAttribute() { if (HasAttributes) { @@ -229,7 +214,7 @@ public override Boolean MoveToFirstAttribute() } /// - public override Boolean MoveToFirstChild() + public override bool MoveToFirstChild() { if (_currentNode.FirstChild == null) { @@ -241,13 +226,13 @@ public override Boolean MoveToFirstChild() } /// - public override Boolean MoveToFirstNamespace(XPathNamespaceScope namespaceScope) + public override bool MoveToFirstNamespace(XPathNamespaceScope namespaceScope) { return false; } /// - public override Boolean MoveToId(String id) + public override bool MoveToId(string id) { var elementById = _document.GetElementById(id); @@ -261,7 +246,7 @@ public override Boolean MoveToId(String id) } /// - public override Boolean MoveToNext() + public override bool MoveToNext() { if (_currentNode.NextSibling == null) { @@ -273,7 +258,7 @@ public override Boolean MoveToNext() } /// - public override Boolean MoveToNextAttribute() + public override bool MoveToNextAttribute() { if (CurrentElement == null) { @@ -290,13 +275,13 @@ public override Boolean MoveToNextAttribute() } /// - public override Boolean MoveToNextNamespace(XPathNamespaceScope namespaceScope) + public override bool MoveToNextNamespace(XPathNamespaceScope namespaceScope) { return false; } /// - public override Boolean MoveToParent() + public override bool MoveToParent() { if (_currentNode.Parent == null) { @@ -308,7 +293,7 @@ public override Boolean MoveToParent() } /// - public override Boolean MoveToPrevious() + public override bool MoveToPrevious() { if (_currentNode.PreviousSibling == null) { @@ -322,7 +307,7 @@ public override Boolean MoveToPrevious() /// public override void MoveToRoot() { - _currentNode = _document.DocumentElement; + _currentNode = _document; } - } + } } From ed91f5400bb3e599c0c353cef174be8f2f116c12 Mon Sep 17 00:00:00 2001 From: Denis Ivanov Date: Sun, 29 Dec 2019 15:27:17 +0400 Subject: [PATCH 2/8] #18 Namespace support in XPath. --- .../HtmlDocumentNavigatorTests.cs | 20 +++++++- src/AngleSharp.XPath/Extensions.cs | 47 +++++++++++++++---- src/AngleSharp.XPath/HtmlDocumentNavigator.cs | 25 +++++++--- 3 files changed, 76 insertions(+), 16 deletions(-) diff --git a/src/AngleSharp.XPath.Tests/HtmlDocumentNavigatorTests.cs b/src/AngleSharp.XPath.Tests/HtmlDocumentNavigatorTests.cs index a2cf8a0..2c017ad 100644 --- a/src/AngleSharp.XPath.Tests/HtmlDocumentNavigatorTests.cs +++ b/src/AngleSharp.XPath.Tests/HtmlDocumentNavigatorTests.cs @@ -65,7 +65,23 @@ public void SelectPrecedingNodeInDocumentWithDoctype_ShouldReturnNode() } [Test] - public void SelectNodeWithNamespace_ShouldReturnNode() + public void SelectSingleNode_IgnoreNamespaces_ShouldReturnNode() + { + // Arrange + var xml = "https://www.test.com/de/accounts/profileweekly0.4"; + var parser = new XmlParser(); + var doc = parser.ParseDocument(xml); + + // Act + var node = doc.DocumentElement.SelectSingleNode("/urlset/url/link"); + + // Assert + Assert.IsNotNull(node); + Assert.That(node.NodeName, Is.EqualTo("xhtml:link")); + } + + [Test] + public void SelectSingleNode_DontIgnoreNamespaces_ShouldReturnNode() { // Arrange var xml = "https://www.test.com/de/accounts/profileweekly0.4"; @@ -77,7 +93,7 @@ public void SelectNodeWithNamespace_ShouldReturnNode() namespaceManager.AddNamespace("d", "http://www.sitemaps.org/schemas/sitemap/0.9"); // Act - var node = doc.DocumentElement.SelectSingleNode("/urlset/url/link"); + var node = doc.DocumentElement.SelectSingleNode("/d:urlset/d:url/xhtml:link", namespaceManager, false); // Assert Assert.IsNotNull(node); diff --git a/src/AngleSharp.XPath/Extensions.cs b/src/AngleSharp.XPath/Extensions.cs index 3c57b2a..9fa707f 100644 --- a/src/AngleSharp.XPath/Extensions.cs +++ b/src/AngleSharp.XPath/Extensions.cs @@ -16,11 +16,12 @@ public static class Extensions /// Creates a new navigator for the given document. /// /// The document to extend. + /// /// The navigator for XPath expressions. - public static XPathNavigator CreateNavigator(this IDocument document) + public static XPathNavigator CreateNavigator(this IDocument document, bool ignoreNamespaces = true) { var doc = document ?? throw new ArgumentNullException(nameof(document)); - return new HtmlDocumentNavigator(doc, doc.DocumentElement); + return new HtmlDocumentNavigator(doc, doc.DocumentElement, ignoreNamespaces); } [DebuggerStepThrough] @@ -35,14 +36,29 @@ internal static string GetOrAdd(this XmlNameTable table, string array) /// /// The element to start looking from. /// The XPath expression. + /// /// The node matching query, if any. /// Throws if or is null - public static INode SelectSingleNode(this IElement element, string xpath) + public static INode SelectSingleNode(this IElement element, string xpath, bool ignoreNamespaces = true) + { + return element.SelectSingleNode(xpath, new XmlNamespaceManager(new NameTable()), ignoreNamespaces); + } + + /// + /// Selects a single node (or returns null) matching the expression. + /// + /// The element to start looking from. + /// The XPath expression. + /// + /// + /// The node matching query, if any. + /// Throws if or is null + public static INode SelectSingleNode(this IElement element, string xpath, IXmlNamespaceResolver resolver, bool ignoreNamespaces = true) { var el = element ?? throw new ArgumentNullException(nameof(element)); var xp = xpath ?? throw new ArgumentNullException(nameof(xpath)); - var nav = new HtmlDocumentNavigator(el.Owner, el); - var it = nav.SelectSingleNode(xp); + var nav = new HtmlDocumentNavigator(el.Owner, el, ignoreNamespaces); + var it = nav.SelectSingleNode(xp, resolver ?? new XmlNamespaceManager(new NameTable())); return ((HtmlDocumentNavigator) it)?.CurrentNode; } @@ -51,14 +67,29 @@ public static INode SelectSingleNode(this IElement element, string xpath) /// /// The element to start looking from. /// The XPath expression. + /// + /// List of nodes matching query. + /// Throws if or is null + public static List SelectNodes(this IElement element, string xpath, bool ignoreNamespaces = true) + { + return element.SelectNodes(xpath, new XmlNamespaceManager(new NameTable()), ignoreNamespaces); + } + + /// + /// Selects a list of nodes matching the expression. + /// + /// The element to start looking from. + /// The XPath expression. + /// + /// /// List of nodes matching query. /// Throws if or is null - public static List SelectNodes(this IElement element, string xpath) + public static List SelectNodes(this IElement element, string xpath, IXmlNamespaceResolver resolver, bool ignoreNamespaces = true) { var el = element ?? throw new ArgumentNullException(nameof(element)); var xp = xpath ?? throw new ArgumentNullException(nameof(xpath)); - var nav = new HtmlDocumentNavigator(el.Owner, el); - var it = nav.Select(xp); + var nav = new HtmlDocumentNavigator(el.Owner, el, ignoreNamespaces); + var it = nav.Select(xp, resolver ?? new XmlNamespaceManager(new NameTable())); var result = new List(); while (it.MoveNext()) diff --git a/src/AngleSharp.XPath/HtmlDocumentNavigator.cs b/src/AngleSharp.XPath/HtmlDocumentNavigator.cs index 494ba3b..3638473 100644 --- a/src/AngleSharp.XPath/HtmlDocumentNavigator.cs +++ b/src/AngleSharp.XPath/HtmlDocumentNavigator.cs @@ -11,18 +11,21 @@ public class HtmlDocumentNavigator : XPathNavigator private readonly IDocument _document; private INode _currentNode; private int _attrIndex; + private readonly bool _ignoreNamespaces; /// /// Creates a new XPath navigator for the given document using the provided root node. /// /// The document to navigate. /// The node to start navigation. - public HtmlDocumentNavigator(IDocument document, INode currentNode) + /// + public HtmlDocumentNavigator(IDocument document, INode currentNode, bool ignoreNamespaces) { _document = document ?? throw new ArgumentNullException(nameof(document)); NameTable = new NameTable(); _currentNode = currentNode ?? throw new ArgumentNullException(nameof(currentNode)); _attrIndex = -1; + _ignoreNamespaces = ignoreNamespaces; } /// @@ -57,10 +60,20 @@ public HtmlDocumentNavigator(IDocument document, INode currentNode) : NameTable.GetOrAdd(_currentNode.NodeName); /// - public override string NamespaceURI => string.Empty - /*_attrIndex != -1 - ? NameTable.GetOrAdd(CurrentElement.Attributes[_attrIndex].NamespaceUri ?? string.Empty) - : NameTable.GetOrAdd(CurrentElement?.NamespaceUri ?? string.Empty)*/; + public override string NamespaceURI + { + get + { + if (_ignoreNamespaces) + { + return string.Empty; + } + + return _attrIndex != -1 + ? NameTable.GetOrAdd(CurrentElement.Attributes[_attrIndex].NamespaceUri ?? string.Empty) + : NameTable.GetOrAdd(CurrentElement?.NamespaceUri ?? string.Empty); + } + } /// public override string Prefix => @@ -169,7 +182,7 @@ public override string Value /// public override XPathNavigator Clone() { - return new HtmlDocumentNavigator(_document, _currentNode); + return new HtmlDocumentNavigator(_document, _currentNode, _ignoreNamespaces); } /// From d9744833b0672b9ed3f4adecaa72c9985ff072a3 Mon Sep 17 00:00:00 2001 From: Denis Ivanov Date: Mon, 6 Jan 2020 21:13:02 +0400 Subject: [PATCH 3/8] New version 1.1.6. --- src/AngleSharp.XPath.Tests/AngleSharp.XPath.Tests.csproj | 2 +- src/AngleSharp.XPath/AngleSharp.XPath.csproj | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/AngleSharp.XPath.Tests/AngleSharp.XPath.Tests.csproj b/src/AngleSharp.XPath.Tests/AngleSharp.XPath.Tests.csproj index 0634ae7..13af12b 100644 --- a/src/AngleSharp.XPath.Tests/AngleSharp.XPath.Tests.csproj +++ b/src/AngleSharp.XPath.Tests/AngleSharp.XPath.Tests.csproj @@ -3,7 +3,7 @@ - + diff --git a/src/AngleSharp.XPath/AngleSharp.XPath.csproj b/src/AngleSharp.XPath/AngleSharp.XPath.csproj index d8d81d8..010bf10 100644 --- a/src/AngleSharp.XPath/AngleSharp.XPath.csproj +++ b/src/AngleSharp.XPath/AngleSharp.XPath.csproj @@ -1,13 +1,13 @@  - 1.1.5 + 1.1.6 Denis Ivanov AngleSharp.XPath - 1.1.5 + 1.1.6 AngleSharp.XPath AngleSharp.XPath netstandard2.0 - 1.1.5 + 1.1.6 XPath support for AngleSharp https://github.com/AngleSharp/AngleSharp.XPath/ MIT From cd266a31b5fcbaf4dc9c0b4beb3b72503fcf1a49 Mon Sep 17 00:00:00 2001 From: Denis Ivanov Date: Wed, 1 Apr 2020 20:28:04 +0400 Subject: [PATCH 4/8] Update AngleSharp to 0.14.0. --- src/AngleSharp.XPath.Tests/AngleSharp.XPath.Tests.csproj | 6 +++--- src/AngleSharp.XPath/AngleSharp.XPath.csproj | 8 ++++---- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/AngleSharp.XPath.Tests/AngleSharp.XPath.Tests.csproj b/src/AngleSharp.XPath.Tests/AngleSharp.XPath.Tests.csproj index 13af12b..01e6733 100644 --- a/src/AngleSharp.XPath.Tests/AngleSharp.XPath.Tests.csproj +++ b/src/AngleSharp.XPath.Tests/AngleSharp.XPath.Tests.csproj @@ -1,9 +1,9 @@  - - + + - + diff --git a/src/AngleSharp.XPath/AngleSharp.XPath.csproj b/src/AngleSharp.XPath/AngleSharp.XPath.csproj index 010bf10..e802bd8 100644 --- a/src/AngleSharp.XPath/AngleSharp.XPath.csproj +++ b/src/AngleSharp.XPath/AngleSharp.XPath.csproj @@ -1,13 +1,13 @@  - 1.1.6 + 1.1.7 Denis Ivanov AngleSharp.XPath - 1.1.6 + 1.1.7 AngleSharp.XPath AngleSharp.XPath netstandard2.0 - 1.1.6 + 1.1.7 XPath support for AngleSharp https://github.com/AngleSharp/AngleSharp.XPath/ MIT @@ -19,7 +19,7 @@ - + From dfd9d33a7eb0201de6a7229ee13a4adcb37f926c Mon Sep 17 00:00:00 2001 From: Denis Ivanov Date: Tue, 15 Jun 2021 11:41:04 +0400 Subject: [PATCH 5/8] Ability to select attributes. (#31) --- .../AngleSharp.XPath.Tests.csproj | 10 ++--- .../HtmlDocumentNavigatorTests.cs | 17 +++++++++ src/AngleSharp.XPath/AngleSharp.XPath.csproj | 11 +++--- src/AngleSharp.XPath/HtmlDocumentNavigator.cs | 38 ++++++++++--------- 4 files changed, 49 insertions(+), 27 deletions(-) diff --git a/src/AngleSharp.XPath.Tests/AngleSharp.XPath.Tests.csproj b/src/AngleSharp.XPath.Tests/AngleSharp.XPath.Tests.csproj index 01e6733..d2e16fb 100644 --- a/src/AngleSharp.XPath.Tests/AngleSharp.XPath.Tests.csproj +++ b/src/AngleSharp.XPath.Tests/AngleSharp.XPath.Tests.csproj @@ -1,15 +1,15 @@  - - - - + + + + - netcoreapp3.0 + net5.0 false \ No newline at end of file diff --git a/src/AngleSharp.XPath.Tests/HtmlDocumentNavigatorTests.cs b/src/AngleSharp.XPath.Tests/HtmlDocumentNavigatorTests.cs index 2c017ad..09862d3 100644 --- a/src/AngleSharp.XPath.Tests/HtmlDocumentNavigatorTests.cs +++ b/src/AngleSharp.XPath.Tests/HtmlDocumentNavigatorTests.cs @@ -99,5 +99,22 @@ public void SelectSingleNode_DontIgnoreNamespaces_ShouldReturnNode() Assert.IsNotNull(node); Assert.That(node.NodeName, Is.EqualTo("xhtml:link")); } + + [Test] + public void SelectNodes_CanReturnAttribute() + { + // Arrange + var html = "
    hello world
    "; + var parser = new HtmlParser(); + var doc = parser.ParseDocument(html); + + // Act + var nodes = doc.DocumentElement.SelectNodes("//@*"); + + // Assert + Assert.IsNotNull(nodes); + Assert.That(nodes, Has.Count.EqualTo(2)); + Assert.That(nodes, Is.All.InstanceOf()); + } } } diff --git a/src/AngleSharp.XPath/AngleSharp.XPath.csproj b/src/AngleSharp.XPath/AngleSharp.XPath.csproj index e802bd8..17f39fd 100644 --- a/src/AngleSharp.XPath/AngleSharp.XPath.csproj +++ b/src/AngleSharp.XPath/AngleSharp.XPath.csproj @@ -1,13 +1,13 @@  - 1.1.7 + 2.0.0-alpha-1 + 2.0.0 + 2.0.0-alpha-1 Denis Ivanov AngleSharp.XPath - 1.1.7 AngleSharp.XPath AngleSharp.XPath - netstandard2.0 - 1.1.7 + net5.0 XPath support for AngleSharp https://github.com/AngleSharp/AngleSharp.XPath/ MIT @@ -16,10 +16,11 @@ Key.snk true True + https://raw.githubusercontent.com/AngleSharp/AngleSharp.XPath/master/logo.png - + diff --git a/src/AngleSharp.XPath/HtmlDocumentNavigator.cs b/src/AngleSharp.XPath/HtmlDocumentNavigator.cs index 3638473..73e34ac 100644 --- a/src/AngleSharp.XPath/HtmlDocumentNavigator.cs +++ b/src/AngleSharp.XPath/HtmlDocumentNavigator.cs @@ -10,7 +10,6 @@ public class HtmlDocumentNavigator : XPathNavigator { private readonly IDocument _document; private INode _currentNode; - private int _attrIndex; private readonly bool _ignoreNamespaces; /// @@ -24,7 +23,6 @@ public HtmlDocumentNavigator(IDocument document, INode currentNode, bool ignoreN _document = document ?? throw new ArgumentNullException(nameof(document)); NameTable = new NameTable(); _currentNode = currentNode ?? throw new ArgumentNullException(nameof(currentNode)); - _attrIndex = -1; _ignoreNamespaces = ignoreNamespaces; } @@ -49,14 +47,14 @@ public HtmlDocumentNavigator(IDocument document, INode currentNode, bool ignoreN /// public override string LocalName => - _attrIndex != -1 - ? NameTable.GetOrAdd(CurrentElement.Attributes[_attrIndex].LocalName) + CurrentNode is IAttr attr + ? attr.LocalName : NameTable.GetOrAdd(CurrentNode is IElement e ? e.LocalName : string.Empty); /// public override string Name => - _attrIndex != -1 - ? NameTable.GetOrAdd(CurrentElement.Attributes[_attrIndex].Name) + CurrentNode is IAttr attr + ? NameTable.GetOrAdd(attr.Name) : NameTable.GetOrAdd(_currentNode.NodeName); /// @@ -69,16 +67,16 @@ public override string NamespaceURI return string.Empty; } - return _attrIndex != -1 - ? NameTable.GetOrAdd(CurrentElement.Attributes[_attrIndex].NamespaceUri ?? string.Empty) + return CurrentNode is IAttr attr + ? NameTable.GetOrAdd(attr.NamespaceUri ?? string.Empty) : NameTable.GetOrAdd(CurrentElement?.NamespaceUri ?? string.Empty); } } /// public override string Prefix => - _attrIndex != 1 - ? NameTable.GetOrAdd(CurrentElement.Attributes[_attrIndex].Prefix ?? string.Empty) + CurrentNode is IAttr attr + ? NameTable.GetOrAdd(attr.Prefix ?? string.Empty) : NameTable.GetOrAdd(CurrentElement?.Prefix ?? string.Empty); /// @@ -107,7 +105,7 @@ public override XPathNodeType NodeType return XPathNodeType.Element; case Dom.NodeType.Element: - return _attrIndex != -1 ? XPathNodeType.Attribute : XPathNodeType.Element; + return XPathNodeType.Element; case Dom.NodeType.ProcessingInstruction: return XPathNodeType.ProcessingInstruction; @@ -155,7 +153,7 @@ public override string Value return documentType.Name; case Dom.NodeType.Element: - return _attrIndex != -1 ? CurrentElement.Attributes[_attrIndex].Value : _currentNode.TextContent; + return _currentNode.TextContent; case Dom.NodeType.Entity: return _currentNode.TextContent; @@ -207,7 +205,6 @@ public override bool MoveTo(XPathNavigator other) if (navigator._document == _document) { _currentNode = navigator._currentNode; - _attrIndex = navigator._attrIndex; return true; } @@ -218,8 +215,8 @@ public override bool MoveTo(XPathNavigator other) public override bool MoveToFirstAttribute() { if (HasAttributes) - { - _attrIndex = 0; + { + _currentNode = CurrentElement.Attributes[0]; return true; } @@ -278,12 +275,19 @@ public override bool MoveToNextAttribute() return false; } - if (_attrIndex >= CurrentElement.Attributes.Length - 1) + if (!(CurrentNode is IAttr attr)) + { + return false; + } + + var attrIndex = attr.OwnerElement.Attributes.Index(attr); + + if (attrIndex >= CurrentElement.Attributes.Length - 1) { return false; } - _attrIndex++; + _currentNode = attr.OwnerElement.Attributes[attrIndex + 1]; return true; } From cbf85d2ced1cc4f2d1de57cfbdbe04508d7505ec Mon Sep 17 00:00:00 2001 From: Denis Ivanov Date: Tue, 15 Jun 2021 14:45:18 +0400 Subject: [PATCH 6/8] Lower case tag names for XHTML. --- .../HtmlDocumentNavigatorTests.cs | 30 +++++++++++++++++++ src/AngleSharp.XPath/AngleSharp.XPath.csproj | 6 ++-- src/AngleSharp.XPath/HtmlDocumentNavigator.cs | 21 ++++++++++--- 3 files changed, 50 insertions(+), 7 deletions(-) diff --git a/src/AngleSharp.XPath.Tests/HtmlDocumentNavigatorTests.cs b/src/AngleSharp.XPath.Tests/HtmlDocumentNavigatorTests.cs index 09862d3..d87508f 100644 --- a/src/AngleSharp.XPath.Tests/HtmlDocumentNavigatorTests.cs +++ b/src/AngleSharp.XPath.Tests/HtmlDocumentNavigatorTests.cs @@ -3,6 +3,7 @@ using AngleSharp.Html.Parser; using NUnit.Framework; using System.Threading.Tasks; +using AngleSharp.Dom; namespace AngleSharp.XPath.Tests { @@ -116,5 +117,34 @@ public void SelectNodes_CanReturnAttribute() Assert.That(nodes, Has.Count.EqualTo(2)); Assert.That(nodes, Is.All.InstanceOf()); } + + [Test] + public void TestNameXPathFunctionOnXMLDoc() + { + // Arrange + var xml = @"Test

    Test

    "; + var angleSharpXmlDoc = new XmlParser().ParseDocument(xml); + + // Act + var xmlNav = angleSharpXmlDoc.CreateNavigator(); + + // Assert + Assert.AreEqual(TagNames.Html, xmlNav.Evaluate("name()")); + } + + [Test] + public void TestNameXPathFunctionOnHTMLDoc() + { + // Arrange + var html = @"Test

    Test

    "; + + var angleSharpHtmlDoc = new HtmlParser().ParseDocument(html); + + // Act + var htmlNav = angleSharpHtmlDoc.CreateNavigator(); + + // Assert + Assert.AreEqual(TagNames.Html, htmlNav.Evaluate("name()")); + } } } diff --git a/src/AngleSharp.XPath/AngleSharp.XPath.csproj b/src/AngleSharp.XPath/AngleSharp.XPath.csproj index 17f39fd..cab2059 100644 --- a/src/AngleSharp.XPath/AngleSharp.XPath.csproj +++ b/src/AngleSharp.XPath/AngleSharp.XPath.csproj @@ -1,13 +1,13 @@  - 2.0.0-alpha-1 + 2.0.0-alpha-2 2.0.0 - 2.0.0-alpha-1 + 2.0.0-alpha-2 Denis Ivanov AngleSharp.XPath AngleSharp.XPath AngleSharp.XPath - net5.0 + netstandard2.0;net46;net461;net472 XPath support for AngleSharp https://github.com/AngleSharp/AngleSharp.XPath/ MIT diff --git a/src/AngleSharp.XPath/HtmlDocumentNavigator.cs b/src/AngleSharp.XPath/HtmlDocumentNavigator.cs index 73e34ac..1178c1a 100644 --- a/src/AngleSharp.XPath/HtmlDocumentNavigator.cs +++ b/src/AngleSharp.XPath/HtmlDocumentNavigator.cs @@ -52,10 +52,23 @@ CurrentNode is IAttr attr : NameTable.GetOrAdd(CurrentNode is IElement e ? e.LocalName : string.Empty); /// - public override string Name => - CurrentNode is IAttr attr - ? NameTable.GetOrAdd(attr.Name) - : NameTable.GetOrAdd(_currentNode.NodeName); + public override string Name + { + get + { + if (CurrentNode is IAttr attr) + { + return NameTable.GetOrAdd(attr.Name); + } + + if (CurrentElement != null) + { + return NameTable.GetOrAdd(CurrentElement.LocalName); + } + + return NameTable.GetOrAdd(_currentNode.NodeName); + } + } /// public override string NamespaceURI From 6e0f82efa15b06c48493654937bc0df262a79288 Mon Sep 17 00:00:00 2001 From: Denis Ivanov Date: Tue, 15 Jun 2021 21:58:29 +0400 Subject: [PATCH 7/8] Fix MoveToParent for Attr. --- .../HtmlDocumentNavigatorTests.cs | 27 +++++++++++++++++++ src/AngleSharp.XPath/AngleSharp.XPath.csproj | 4 +-- src/AngleSharp.XPath/HtmlDocumentNavigator.cs | 13 ++++++++- 3 files changed, 41 insertions(+), 3 deletions(-) diff --git a/src/AngleSharp.XPath.Tests/HtmlDocumentNavigatorTests.cs b/src/AngleSharp.XPath.Tests/HtmlDocumentNavigatorTests.cs index d87508f..202e7fe 100644 --- a/src/AngleSharp.XPath.Tests/HtmlDocumentNavigatorTests.cs +++ b/src/AngleSharp.XPath.Tests/HtmlDocumentNavigatorTests.cs @@ -3,6 +3,7 @@ using AngleSharp.Html.Parser; using NUnit.Framework; using System.Threading.Tasks; +using System.Xml.XPath; using AngleSharp.Dom; namespace AngleSharp.XPath.Tests @@ -146,5 +147,31 @@ public void TestNameXPathFunctionOnHTMLDoc() // Assert Assert.AreEqual(TagNames.Html, htmlNav.Evaluate("name()")); } + + [Test] + public void MoveToParent_CallWhenCurrentNodeIsAttr_ShouldBeMovedToAttrOwnerElement() + { + // Arrange + var xml = @"foo"; + var parser = new XmlParser(); + var doc = parser.ParseDocument(xml); + var nav = doc.CreateNavigator(false); + nav.MoveToChild("root", ""); + + // Act + + if (nav.MoveToFirstAttribute()) + { + do + { + Assert.AreEqual(nav.NodeType, XPathNodeType.Attribute); + } + while (nav.MoveToNextAttribute()); + nav.MoveToParent(); + } + + // Assert + Assert.AreEqual(nav.Name, "root"); + } } } diff --git a/src/AngleSharp.XPath/AngleSharp.XPath.csproj b/src/AngleSharp.XPath/AngleSharp.XPath.csproj index cab2059..1defece 100644 --- a/src/AngleSharp.XPath/AngleSharp.XPath.csproj +++ b/src/AngleSharp.XPath/AngleSharp.XPath.csproj @@ -1,8 +1,8 @@  - 2.0.0-alpha-2 + 2.0.0-alpha-3 2.0.0 - 2.0.0-alpha-2 + 2.0.0-alpha-3 Denis Ivanov AngleSharp.XPath AngleSharp.XPath diff --git a/src/AngleSharp.XPath/HtmlDocumentNavigator.cs b/src/AngleSharp.XPath/HtmlDocumentNavigator.cs index 1178c1a..34d1051 100644 --- a/src/AngleSharp.XPath/HtmlDocumentNavigator.cs +++ b/src/AngleSharp.XPath/HtmlDocumentNavigator.cs @@ -293,11 +293,16 @@ public override bool MoveToNextAttribute() return false; } + if (attr.OwnerElement == null) + { + return false; + } + var attrIndex = attr.OwnerElement.Attributes.Index(attr); if (attrIndex >= CurrentElement.Attributes.Length - 1) { - return false; + return false; } _currentNode = attr.OwnerElement.Attributes[attrIndex + 1]; @@ -313,6 +318,12 @@ public override bool MoveToNextNamespace(XPathNamespaceScope namespaceScope) /// public override bool MoveToParent() { + if (CurrentNode is IAttr attr) + { + _currentNode = attr.OwnerElement; + return true; + } + if (_currentNode.Parent == null) { return false; From 36bd9001209a9f176700110eced3547f463aad8c Mon Sep 17 00:00:00 2001 From: Denis Ivanov Date: Wed, 1 Jun 2022 10:08:04 +0400 Subject: [PATCH 8/8] Update AngleSharp to 0.17.0. --- .../AngleSharp.XPath.Tests.csproj | 12 ++++++------ src/AngleSharp.XPath/AngleSharp.XPath.csproj | 12 ++++++++---- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/src/AngleSharp.XPath.Tests/AngleSharp.XPath.Tests.csproj b/src/AngleSharp.XPath.Tests/AngleSharp.XPath.Tests.csproj index d2e16fb..5a2cd1b 100644 --- a/src/AngleSharp.XPath.Tests/AngleSharp.XPath.Tests.csproj +++ b/src/AngleSharp.XPath.Tests/AngleSharp.XPath.Tests.csproj @@ -1,15 +1,15 @@  - - - - + + + + - net5.0 + net6.0 false - \ No newline at end of file + diff --git a/src/AngleSharp.XPath/AngleSharp.XPath.csproj b/src/AngleSharp.XPath/AngleSharp.XPath.csproj index 1defece..f116ac4 100644 --- a/src/AngleSharp.XPath/AngleSharp.XPath.csproj +++ b/src/AngleSharp.XPath/AngleSharp.XPath.csproj @@ -1,13 +1,13 @@  - 2.0.0-alpha-3 + 2.0.0 2.0.0 - 2.0.0-alpha-3 + 2.0.0 Denis Ivanov AngleSharp.XPath AngleSharp.XPath AngleSharp.XPath - netstandard2.0;net46;net461;net472 + netstandard2.0;net461;net472;net6.0 XPath support for AngleSharp https://github.com/AngleSharp/AngleSharp.XPath/ MIT @@ -20,7 +20,11 @@ - + + + + +