From 1c6981297e9b1e723f510d69d9ce091a4262049f Mon Sep 17 00:00:00 2001 From: Tony Date: Sun, 28 Apr 2024 05:55:46 +0800 Subject: [PATCH] fix(route): sohu mp (#15390) * fix(route): sohu mp * fix: remove unwanted element --- lib/routes/sohu/mp.ts | 246 +++++++++++++++++++++++++++++++----------- 1 file changed, 183 insertions(+), 63 deletions(-) diff --git a/lib/routes/sohu/mp.ts b/lib/routes/sohu/mp.ts index 8a6ca2f05c6123..b822b5f7f6eff3 100644 --- a/lib/routes/sohu/mp.ts +++ b/lib/routes/sohu/mp.ts @@ -3,88 +3,208 @@ import { getCurrentPath } from '@/utils/helpers'; const __dirname = getCurrentPath(import.meta.url); import cache from '@/utils/cache'; -import got from '@/utils/got'; -import { load } from 'cheerio'; +import ofetch from '@/utils/ofetch'; +import * as cheerio from 'cheerio'; import { parseDate } from '@/utils/parse-date'; import path from 'node:path'; import { art } from '@/utils/render'; export const route: Route = { - path: '/mp/:id', + path: '/mp/:xpt', categories: ['new-media'], - example: '/sohu/mp/119097', - parameters: { id: '搜狐号 ID' }, - features: { - requireConfig: false, - requirePuppeteer: false, - antiCrawler: false, - supportBT: false, - supportPodcast: false, - supportScihub: false, - }, - name: '更新', + example: '/sohu/mp/c29odXptdGhnbjZ3NEBzb2h1LmNvbQ==', + parameters: { xpt: '搜狐号 xpt ,可在URL中找到或搜狐号 ID' }, + radar: [ + { + source: ['mp.sohu.com/profile'], + target: (_params, url) => `/sohu/mp/${new URL(url).searchParams.get('xpt')}`, + }, + ], + name: '最新', maintainers: ['HenryQW'], handler, - description: `1. 通过浏览器搜索相关搜狐号 \`果壳 site: mp.sohu.com\`。 - 2. 通过浏览器控制台执行 \`contentData.mkey\`,返回的即为搜狐号 ID。`, + description: `搜狐号 ID 可以通过以下方式获取: + 1. 通过浏览器搜索相关搜狐号 \`果壳 site: mp.sohu.com\`。 + 2. 通过浏览器控制台执行 \`window.globalConst.mkeyConst_mkey\`,返回的即为搜狐号 ID。`, }; +function randomString(length = 32) { + let r = ''; + const e = 'ABCDEFGHJKMNPQRSTWXYZabcdefhijkmnprstwxyz2345678'; + const n = e.length; + for (let i = 0; i < length; i++) { + r += e.charAt(Math.floor(Math.random() * n)); + } + return r; +} +const defaultSUV = '1612268936507kas0gk'; + +function fetchArticle(item) { + return cache.tryGet(item.link, async () => { + const response = await ofetch(item.link); + const $ = cheerio.load(response); + + $('.original-title, .lookall-box').remove(); + item.author = item.author || $('span[data-role="original-link"] a').text(); + + if (/window\.sohu_mp\.article_video/.test($('script').text())) { + const videoSrc = $('script') + .text() + .match(/\s*url: "(.*?)",/)?.[1]; + item.description = art(path.join(__dirname, 'templates/video.art'), { + poster: $('script') + .text() + .match(/cover: "(.*?)",/)?.[1], + src: videoSrc, + type: videoSrc?.split('.').pop()?.toLowerCase(), + }); + } else { + const article = $('#mp-editor'); + + article.find('#backsohucom, p[data-role="editor-name"]').each((i, e) => { + $(e).remove(); + }); + + item.description = article.html(); + } + + return item; + }); +} + async function handler(ctx) { - const id = ctx.req.param('id'); + const xpt = ctx.req.param('xpt'); + const isPureNumber = /^\d+$/.test(xpt); + if (isPureNumber) { + return legacyIdHandler(ctx); + } + + const pageResponse = await ofetch.raw('https://mp.sohu.com/profile', { + query: { + xpt, + }, + }); + const suv = pageResponse.headers + ?.getSetCookie() + .find((e) => e.startsWith('SUV')) + ?.split(';')[0]; + const $ = cheerio.load(pageResponse._data); + + const CBDRenderConst = JSON.parse( + $('script:contains("CBDRenderConst")') + .text() + .trim() + .match(/CBDRenderConst\s=\s(.*)/)?.[1] || '{}' + ); + const contentData = JSON.parse( + $('script:contains("contentData")') + .toArray() + .map( + (e) => + $(e) + .text() + .match(/contentData = (.*)/)?.[1] + ) + .sort((a: any, b: any) => b.length - a.length)[0] || '{}' + ); + const renderData = JSON.parse( + $('script:contains("column_2_text")') + .text() + .match(/renderData:\s(.*)/)?.[1] || '{}' + ); + const globalConst = JSON.parse( + $('script:contains("globalConst")') + .text() + .match(/globalConst\s=\s(.*)/)?.[1] || '{}' + ); + const originalRequest = JSON.parse( + $('script:contains("originalRequest")') + .text() + .match(/originalRequest\s=\s(.*)/)?.[1] || '{}' + ); + + const blockData = await ofetch('https://odin.sohu.com/odin/api/blockdata', { + method: 'POST', + headers: { + Cookie: Object.entries({ + SUV: suv, + itssohu: 'true', + reqtype: 'pc', + t: Date.now(), + }) + .map(([key, value]) => `${key}=${value}`) + .join('; '), + }, + body: { + pvId: CBDRenderConst.COMMONCONFIG.pvId || `${Date.now()}_${randomString(7)}`, + pageId: `${Date.now()}_${defaultSUV.slice(0, -5)}_${randomString(3)}`, + mainContent: { + productType: contentData.businessType || '13', + productId: contentData.id || '324', + secureScore: contentData.secureScore || '5', + categoryId: contentData.categoryId || '47', + adTags: contentData.adTags || '11111111', + authorId: contentData.account.id || 121_135_924, + }, + resourceList: [ + { + tplCompKey: renderData.param.data2.reqParam.tplCompKey || 'FeedSlideloadAuthor_2_0_pc_1655965929143_data2', + isServerRender: renderData.param.data2.reqParam.isServerRender || false, + isSingleAd: renderData.param.data2.reqParam.isSingleAd || false, + configSource: renderData.param.data2.reqParam.configSource || 'mp', + content: { + productId: renderData.param.data2.reqParam.content.productId || '325', + productType: renderData.param.data2.reqParam.content.productType || '13', + size: 20, + pro: renderData.param.pro || '0,1,3,4,5', + feedType: renderData.param.feedType || 'XTOPIC_SYNTHETICAL', + view: '', + innerTag: renderData.param.data2.reqParam.content.innerTag || 'work', + spm: renderData.param.data2.reqParam.content.spm || 'smpc.channel_248.block3_308_hHsK47_2_fd', + page: 1, + requestId: `${Date.now()}_${randomString(13)}_${contentData.id}`, + }, + adInfo: {}, + context: { + mkey: globalConst.mkeyConst_mkey, // legacy ID + }, + }, + ], + }, + }); + + const list = blockData.data[renderData.param.data2.reqParam.tplCompKey].list.map((item) => ({ + title: item.title, + description: item.brief, + link: `https://www.sohu.com/a/${item.id}_${item.authorId}`, + author: item.authorName, + pubDate: parseDate(item.postTime, 'x'), + })); + + const items = await Promise.all(list.map((e) => fetchArticle(e))); + + return { + title: `搜狐号 - ${globalConst.title}`, + link: originalRequest.url, + item: items, + }; +} + +async function legacyIdHandler(ctx) { + const id = ctx.req.param('xpt'); const authorArticleAPI = `https://v2.sohu.com/author-page-api/author-articles/pc/${id}`; - const response = await got(authorArticleAPI); - const list = response.data.data.pcArticleVOS.map((item) => ({ + const response = await ofetch(authorArticleAPI); + const list = response.data.pcArticleVOS.map((item) => ({ title: item.title, link: item.link.startsWith('http') ? item.link : `https://${item.link}`, pubDate: parseDate(item.publicTime), })); - let author, link; - - const items = await Promise.all( - list.map((e) => - cache.tryGet(e.link, async () => { - const { data: response } = await got(e.link); - const $ = load(response); - - if (!author) { - const meta = $('span[data-role="original-link"]'); - author = meta.find('a').text(); - // can't get author's link on server, so use the RSSHub link - // link = meta.attr('href').split('==')[0]; - } - - if (/window\.sohu_mp\.article_video/.test($('script').text())) { - const videoSrc = $('script') - .text() - .match(/\s*url: "(.*?)",/)?.[1]; - e.description = art(path.join(__dirname, 'templates/video.art'), { - poster: $('script') - .text() - .match(/cover: "(.*?)",/)?.[1], - src: videoSrc, - type: videoSrc?.split('.').pop().toLowerCase(), - }); - } else { - const article = $('#mp-editor'); - - article.find('#backsohucom, p[data-role="editor-name"]').each((i, e) => { - $(e).remove(); - }); - - e.description = article.html(); - } - - e.author = author; - - return e; - }) - ) - ); + + const items = await Promise.all(list.map((e) => fetchArticle(e))); return { - title: `搜狐号 - ${author}`, - link, + title: `搜狐号 - ${id}`, item: items, }; }