Skip to content

Commit

Permalink
feat: zhaishuyuan js decode (DIYgod#6924)
Browse files Browse the repository at this point in the history
  • Loading branch information
viing937 authored Feb 15, 2021
1 parent 9cd6cda commit 06184a2
Showing 1 changed file with 55 additions and 8 deletions.
63 changes: 55 additions & 8 deletions lib/routes/novel/zhaishuyuan.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,35 @@ const got = require('@/utils/got');
const cheerio = require('cheerio');
const iconv = require('iconv-lite');

const encodeMap = {
"$$$": "7",
"$$$$": "f",
"$$$_": "e",
"$$_": "6",
"$$_$": "d",
"$$__": "c",
"$_$": "5",
"$_$$": "b",
"$_$_": "a",
"$__": "4",
"$__$": "9",
"$___": "8",
"_": "u",
"_$$": "3",
"_$_": "2",
"__$": "1",
"___": "0",
};
const tokenPattern = /[A-Z]\.([$_]+)|(\/?\\+)/g;
const contentPattern = /\\74\\160\\76[0-9a-fu/\\]+?\\74\/\\160\\76/g;
const regex16 = /\\u([\d\w]{4})/gi;
const regex8 = /\\([\d]{1,4})/gi;

module.exports = async (ctx) => {
const id = ctx.params.id;
const url = 'https://www.zhaishuyuan.com';
const link = `${url}/book/${id}`;
const extendedGot = got.extend({ headers: { Referer: url }, responseType: 'buffer' });
let extendedGot = got.extend({ headers: { Referer: url }, responseType: 'buffer' });
const response = await extendedGot.get(link);
const html = iconv.decode(response.data, 'gb2312');
const $ = cheerio.load(html);
Expand All @@ -17,19 +41,42 @@ module.exports = async (ctx) => {
.find('a')
.map((_, { attribs: { title, href } }) => ({ title, link: `${url}${href}` }))
.get();
extendedGot = got.extend({ headers: { Referer: link }, responseType: 'buffer' });
const item = await Promise.all(
list.map(
async ({ title, link }) =>
await ctx.cache.tryGet(link, async () => {
const browser = await require('@/utils/puppeteer')();
const page = await browser.newPage();
await page.goto(link);
const encoded = await page.$('#content > div > a');
await encoded.evaluate((encoded) => encoded.click());
const html = await page.evaluate(() => document.documentElement.innerHTML);
const response = await extendedGot.get(link);
const html = iconv.decode(response.data, 'gb2312');
const $ = cheerio.load(html);
const content = $('#content');
content.find('div').remove();

const encoded = $('#content > div');
if (encoded) {
// 提取编码后的正文内容进行复原
const rawText = html;
let rawContent = "";
for (const match of rawText.matchAll(tokenPattern)) {
if (match[1] in encodeMap) {
rawContent += encodeMap[match[1]];
}
if (match[2]) {
rawContent += match[2].replace("\\\\\\\\", "\\").replace("\\\\", "\\");
}
}
const decodedContentArr = [];
for (const contentPar of rawContent.matchAll(contentPattern)) {
const decodedStr = contentPar[0].replace(regex16, function (match, grp) {
return String.fromCharCode(parseInt(grp, 16));
}).replace(regex8, function (match, grp) {
return String.fromCharCode(parseInt(grp, 8));
});
decodedContentArr.push(decodedStr);
}
encoded.before(decodedContentArr);
encoded.remove();
}

const description = content.html();
const spanList = $('.title > span');
const author = spanList.eq(0).find('a').text();
Expand Down

0 comments on commit 06184a2

Please sign in to comment.