Skip to content

Commit

Permalink
refactor: optimize parameter (DIYgod#3200)
Browse files Browse the repository at this point in the history
  • Loading branch information
kt286 authored and DIYgod committed Oct 8, 2019
1 parent a2600c4 commit 90a87b1
Show file tree
Hide file tree
Showing 2 changed files with 138 additions and 130 deletions.
267 changes: 137 additions & 130 deletions lib/middleware/parameter.js
Original file line number Diff line number Diff line change
Expand Up @@ -19,158 +19,165 @@ module.exports = async (ctx, next) => {
throw Error('this route is empty, please check the original site or <a href="https://github.com/DIYgod/RSSHub/issues/new/choose">create an issue</a>');
}

if (ctx.query && ctx.query.mode && ctx.query.mode.toLowerCase() === 'fulltext') {
const tasks = ctx.state.data.item.map(async (item) => {
const { link, author, description } = item;
const parsed_result = await ctx.cache.tryGet(`mercury-cache-${link}`, async () => {
// if parser failed, return default description and not report error
try {
const res = await got(link);
const $ = cheerio.load(res.data);
const result = await mercury_parser.parse(link, {
html: $.html(),
});
return result;
} catch (e) {
// no-empty
}
});
// fix allowEmpty
ctx.state.data.item = ctx.state.data.item || [];

item.author = author || (parsed_result ? parsed_result.author : '');
item.description = parsed_result ? parsed_result.content : description;
});
await Promise.all(tasks);
}
// decode HTML entities
ctx.state.data.title && (ctx.state.data.title = he.decode(ctx.state.data.title + ''));
ctx.state.data.description && (ctx.state.data.description = he.decode(ctx.state.data.description + ''));

// handle description
if (ctx.state.data.item && ctx.state.data.item.length) {
ctx.state.data.item.forEach((item) => {
if (item.description) {
const $ = cheerio.load(item.description);
let baseUrl = item.link || ctx.state.data.link;

if (baseUrl && !baseUrl.match(/^https?:\/\//)) {
if (baseUrl.match(/^\/\//)) {
baseUrl = 'http:' + baseUrl;
} else {
baseUrl = 'http://' + baseUrl;
}
ctx.state.data.item.forEach((item) => {
item.title && (item.title = he.decode(item.title + ''));

if (item.description) {
const $ = cheerio.load(item.description);
let baseUrl = item.link || ctx.state.data.link;

if (baseUrl && !baseUrl.match(/^https?:\/\//)) {
if (baseUrl.match(/^\/\//)) {
baseUrl = 'http:' + baseUrl;
} else {
baseUrl = 'http://' + baseUrl;
}
}

$('script').remove();
$('script').remove();

$('a').each((_, ele) => {
const $ele = $(ele);
$('a').each((_, ele) => {
const $ele = $(ele);

// absolute link
if (baseUrl) {
try {
$ele.attr('href', new URL($ele.attr('href'), baseUrl).href);
} catch (e) {
// no-empty
}
// absolute link
if (baseUrl) {
try {
$ele.attr('href', new URL($ele.attr('href'), baseUrl).href);
} catch (e) {
// no-empty
}
});
}
});

$('img').each((_, ele) => {
const $ele = $(ele);

$('img').each((_, ele) => {
const $ele = $(ele);

// fix lazyload
if (!$ele.attr('src')) {
for (const key in ele.attribs) {
const value = ele.attribs[key].trim();
if (['.gif', '.png', '.jpg', '.webp'].some((suffix) => value.includes(suffix))) {
$ele.attr('src', value);
break;
}
// fix lazyload
if (!$ele.attr('src')) {
for (const key in ele.attribs) {
const value = ele.attribs[key].trim();
if (['.gif', '.png', '.jpg', '.webp'].some((suffix) => value.includes(suffix))) {
$ele.attr('src', value);
break;
}
}
}

// absolute link
if (baseUrl) {
try {
$ele.attr('src', new URL($ele.attr('src'), baseUrl).href);
} catch (e) {
// no-empty
}
// absolute link
if (baseUrl) {
try {
$ele.attr('src', new URL($ele.attr('src'), baseUrl).href);
} catch (e) {
// no-empty
}
}

// referrerpolicy
$ele.attr('referrerpolicy', 'no-referrer');
// referrerpolicy
$ele.attr('referrerpolicy', 'no-referrer');

// redundant attributes
['onclick', 'onerror', 'onload'].forEach((e) => {
$ele.removeAttr(e);
});
// redundant attributes
['onclick', 'onerror', 'onload'].forEach((e) => {
$ele.removeAttr(e);
});
item.description = $('body').html();
}
});
}
});
item.description = he.decode($('body').html() + '');
}
});

// decode HTML entities
ctx.state.data.title && (ctx.state.data.title = he.decode(ctx.state.data.title + ''));
ctx.state.data.description && (ctx.state.data.description = he.decode(ctx.state.data.description + ''));
ctx.state.data.item &&
ctx.state.data.item.forEach((item) => {
item.title && (item.title = he.decode(item.title + ''));
item.description && (item.description = he.decode(item.description + ''));
});

// filter
if (ctx.query && (ctx.query.filter || ctx.query.filter_title || ctx.query.filter_description || ctx.query.filter_author)) {
ctx.state.data.item = ctx.state.data.item.filter((item) => {
const title = item.title || '';
const description = item.description || title;
const author = item.author || '';
return !(
(ctx.query.filter && !title.match(ctx.query.filter) && !description.match(ctx.query.filter)) ||
(ctx.query.filter_title && !title.match(ctx.query.filter_title)) ||
(ctx.query.filter_description && !description.match(ctx.query.filter_description)) ||
(ctx.query.filter_author && !author.match(ctx.query.filter_author))
);
});
}
if (ctx.query && (ctx.query.filterout || ctx.query.filterout_title || ctx.query.filterout_description || ctx.query.filterout_author)) {
ctx.state.data.item = ctx.state.data.item.filter((item) => {
const title = item.title;
const description = item.description || title;
const author = item.author || '';
return (
(ctx.query.filterout && !title.match(ctx.query.filterout) && !description.match(ctx.query.filterout)) ||
(ctx.query.filterout_title && !title.match(ctx.query.filterout_title)) ||
(ctx.query.filterout_description && !description.match(ctx.query.filterout_description)) ||
(ctx.query.filterout_author && !author.match(ctx.query.filterout_author))
);
});
}
if (ctx.query && ctx.query.filter_time) {
const now = Date.now();
ctx.state.data.item = ctx.state.data.item.filter(({ pubDate }) => {
if (!pubDate) {
return true;
if (ctx.query) {
// limit
if (ctx.query.limit) {
ctx.state.data.item = ctx.state.data.item.slice(0, parseInt(ctx.query.limit));
}

// filter
if (ctx.query.filter || ctx.query.filter_title || ctx.query.filter_description || ctx.query.filter_author) {
if (ctx.query.filter) {
ctx.query.filter_title = ctx.query.filter;
ctx.query.filter_description = ctx.query.filter;
}
ctx.state.data.item = ctx.state.data.item.filter((item) => {
const title = item.title || '';
const description = item.description || title;
const author = item.author || '';
let isFilter = true;
ctx.query.filter_title && (isFilter = isFilter && !title.match(ctx.query.filter_title));
ctx.query.filter_description && (isFilter = isFilter && !description.match(ctx.query.filter_description));
ctx.query.filter_author && (isFilter = isFilter && !author.match(ctx.query.filter_author));
return !isFilter;
});
}

try {
return now - new Date(pubDate).getTime() <= parseInt(ctx.query.filter_time) * 1000;
} catch (err) {
return true;
if (ctx.query.filterout || ctx.query.filterout_title || ctx.query.filterout_description || ctx.query.filterout_author) {
if (ctx.query.filterout) {
ctx.query.filterout_title = ctx.query.filterout;
ctx.query.filterout_description = ctx.query.filterout;
}
});
}
ctx.state.data.item = ctx.state.data.item.filter((item) => {
const title = item.title;
const description = item.description || title;
const author = item.author || '';
let isFilter = true;
ctx.query.filterout_title && (isFilter = isFilter && !title.match(ctx.query.filterout_title));
ctx.query.filterout_description && (isFilter = isFilter && !description.match(ctx.query.filterout_description));
ctx.query.filterout_author && (isFilter = isFilter && !author.match(ctx.query.filterout_author));
return isFilter;
});
}

// limit
if (ctx.query && ctx.query.limit) {
ctx.state.data.item = ctx.state.data.item.slice(0, parseInt(ctx.query.limit));
}
if (ctx.query.filter_time) {
const now = Date.now();
ctx.state.data.item = ctx.state.data.item.filter(({ pubDate }) => {
let isFilter = true;
try {
isFilter = !pubDate || now - new Date(pubDate).getTime() <= parseInt(ctx.query.filter_time) * 1000;
} catch (err) {
// no-empty
}
return isFilter;
});
}

// telegram instant view
if (ctx.query.tgiv) {
ctx.state.data.item.map((item) => {
const encodedlink = encodeURIComponent(item.link);
item.link = `https://t.me/iv?url=${encodedlink}&rhash=${ctx.query.tgiv}`;
return item;
});
}

// fulltest
if (ctx.query.mode && ctx.query.mode.toLowerCase() === 'fulltext') {
const tasks = ctx.state.data.item.map(async (item) => {
const { link, author, description } = item;
const parsed_result = await ctx.cache.tryGet(`mercury-cache-${link}`, async () => {
// if parser failed, return default description and not report error
try {
const res = await got(link);
const $ = cheerio.load(res.data);
const result = await mercury_parser.parse(link, {
html: $.html(),
});
return result;
} catch (e) {
// no-empty
}
});

// telegram instant view
if (ctx.query && ctx.query.tgiv) {
ctx.state.data.item.map((item) => {
const encodedlink = encodeURIComponent(item.link);
item.link = `https://t.me/iv?url=${encodedlink}&rhash=${ctx.query.tgiv}`;
return item;
});
item.author = author || (parsed_result ? parsed_result.author : '');
item.description = parsed_result ? parsed_result.content : description;
});
await Promise.all(tasks);
}
}
}
};
1 change: 1 addition & 0 deletions test/utils/got.js
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ describe('got', () => {
params: {
test: 1,
},
responseType: 'buffer',
});
});
});

0 comments on commit 90a87b1

Please sign in to comment.