Skip to content

Commit

Permalink
feat: Add full article for AP News (DIYgod#4103)
Browse files Browse the repository at this point in the history
  • Loading branch information
zoenglinghou authored Feb 29, 2020
1 parent 2f001cd commit 859ba6f
Showing 1 changed file with 52 additions and 27 deletions.
79 changes: 52 additions & 27 deletions lib/routes/apnews/topics.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
const got = require('@/utils/got');
const cheerio = require('cheerio');
const url = require('url');

module.exports = async (ctx) => {
const topic = ctx.params.topic;
Expand All @@ -12,35 +13,59 @@ module.exports = async (ctx) => {
const data = response.data;

const $ = cheerio.load(data);
const list = $('div.FeedCard');
// const list = $('div.FeedCard');
const list = [];
$('div.FeedCard').each(function(index, item) {
if (
$(item)
.find('a[class^=Component-headline]')
.attr('href') !== undefined
) {
list.push(item);
}
});

const out = await Promise.all(
list.map(async (article) => {
const link = url.resolve(
'https://apnews.com',
$(article)
.find('a[class^=Component-headline]')
.attr('href')
);

const [title, author, pubDate, description] = await ctx.cache.tryGet(link, async () => {
const result = await got.get(link);

const $ = cheerio.load(result.data);

const head = JSON.parse($('script[type="application/ld+json"]').html());

const title = head.headline;
const author = head.author.join(' & ');
const pubDate = head.datePublished;

const text = $('div.Article').html();
const imageUrl = head.image;
const description = `<img src="${imageUrl}">` + text;

return [title, author, pubDate, description];
});

const item = {
title: title,
description: description,
pubDate: pubDate,
link: link,
author: author,
};
return Promise.resolve(item);
})
);

ctx.state.data = {
title: $('title').text(),
title: 'AP News - ' + $('title').text(),
link: `https://www.apnews.com/${topic}`,
item:
list &&
list
.map((index, item) => {
item = $(item);

return {
title: item
.find('h1[class^=Component-h1]')
.first()
.text(),
author: item
.find('span[class^=Component-bylines]')
.first()
.text()
.replace('By ', ''),
description: item
.find('div.content')
.first()
.text(),
pubDate: item.find('span[class^="Timestamp Component-root"]').attr('data-source'),
link: item.find('a[class^=Component-headline]').attr('href'),
};
})
.get(),
item: out,
};
};

0 comments on commit 859ba6f

Please sign in to comment.