From ed765772b3787913022e47782a7f35bbd1d2b876 Mon Sep 17 00:00:00 2001 From: freeok <51998152+freeok@users.noreply.github.com> Date: Thu, 5 Dec 2024 16:29:33 +0800 Subject: [PATCH] =?UTF-8?q?:bug:=20=E4=BF=AE=E5=A4=8D=E6=9F=90=E4=BA=9B?= =?UTF-8?q?=E6=83=85=E5=86=B5=E4=B8=8B=E6=90=9C=E7=B4=A2=E5=A4=B1=E8=B4=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../sonovel/parse/SearchResultParser.java | 22 +++++++++---------- .../pcdd/sonovel/BookSourceQualityTest.java | 8 +++---- 2 files changed, 13 insertions(+), 17 deletions(-) diff --git a/src/main/java/com/pcdd/sonovel/parse/SearchResultParser.java b/src/main/java/com/pcdd/sonovel/parse/SearchResultParser.java index 7a1a125..17f78bd 100644 --- a/src/main/java/com/pcdd/sonovel/parse/SearchResultParser.java +++ b/src/main/java/com/pcdd/sonovel/parse/SearchResultParser.java @@ -15,7 +15,6 @@ import org.jsoup.select.Elements; import java.util.*; -import java.util.stream.Stream; /** * @author pcdd @@ -67,24 +66,23 @@ public List parse(String keyword) { @SneakyThrows private List getSearchResults(String url, Document document) { - Rule.Search search = this.rule.getSearch(); + Rule.Search rule = this.rule.getSearch(); // 搜索结果页 DOM if (document == null) document = Jsoup.connect(url).timeout(TIMEOUT_MILLS).get(); - Elements elements = document.select(search.getResult()); - + Elements elements = document.select(rule.getResult()); List list = new ArrayList<>(); for (Element element : elements) { // jsoup 不支持一次性获取属性的值 - String href = element.select(search.getBookName()).attr("href"); - String bookName = element.select(search.getBookName()).text(); - String latestChapter = element.select(search.getLatestChapter()).text(); - String author = element.select(search.getAuthor()).text(); - String update = element.select(search.getUpdate()).text(); + String href = element.select(rule.getBookName()).attr("href"); + String bookName = element.select(rule.getBookName()).text(); + String latestChapter = element.select(rule.getLatestChapter()).text(); + String author = element.select(rule.getAuthor()).text(); + String update = element.select(rule.getUpdate()).text(); - // 如果存在任何一个字符串为空字符串(针对书源 1:排除第一个 tr 表头) - if (Stream.of(href, bookName, latestChapter, author, update).anyMatch(String::isEmpty)) continue; + // 针对书源 1:排除第一个 tr 表头 + if (bookName.isEmpty()) continue; SearchResult build = SearchResult.builder() .url(CrawlUtils.normalizeUrl(href, this.rule.getUrl())) @@ -100,4 +98,4 @@ private List getSearchResults(String url, Document document) { return list; } -} +} \ No newline at end of file diff --git a/src/test/java/com/pcdd/sonovel/BookSourceQualityTest.java b/src/test/java/com/pcdd/sonovel/BookSourceQualityTest.java index 732d69e..cac72da 100644 --- a/src/test/java/com/pcdd/sonovel/BookSourceQualityTest.java +++ b/src/test/java/com/pcdd/sonovel/BookSourceQualityTest.java @@ -73,7 +73,7 @@ static void generateMarkdown(String name, List> lists, Strin name, lists.get(0).size(), DateTime.now().toString(DatePattern.NORM_DATE_PATTERN))); - result.append(StrUtil.format("| 序号 | 书名 | 作者 {} 起点链接 |\n", s1)); + result.append(StrUtil.format("| 排名 | 书名 | 作者 {} 起点链接 |\n", s1)); result.append(StrUtil.format("| ---- | ---- | ---- {} ---- |\n", s2)); List list = lists.get(0); @@ -87,14 +87,12 @@ static void generateMarkdown(String name, List> lists, Strin foundBuilder.append(StrUtil.format("{} |", sq.getFound() ? "✅" : "❌")); } - String s = StrUtil.format("| {} | {} | {} | {} {} |\n", + result.append(StrUtil.format("| {} | {} | {} | {} {} |\n", i + 1, o.getBookName(), o.getAuthor(), foundBuilder, - o.getQiDianUrl()); - - result.append(s); + o.getQiDianUrl())); } Writer writer = new FileWriter(fileName);