Skip to content

Commit

Permalink
feat(route): use API for reuters. (DIYgod#13190)
Browse files Browse the repository at this point in the history
* feat(route): use reuters api.

* Fix typo in doc.

* Refining

* Update common.js

* Update common.js

* Update common.js

* Update common.js

* Update common.js

* Refactor the code, and apologize for my stubbornness.
  • Loading branch information
dzx-dzx authored Sep 5, 2023
1 parent 54f7b45 commit 31b5618
Show file tree
Hide file tree
Showing 2 changed files with 52 additions and 29 deletions.
75 changes: 49 additions & 26 deletions lib/v2/reuters/common.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,31 +9,54 @@ module.exports = async (ctx) => {
const topic = ctx.params.topic ?? (category === 'authors' ? 'reuters' : '');
const limit = ctx.query.limit ? parseInt(ctx.query.limit) : 20;

const rootUrl = 'https://www.reuters.com';
const currentUrl = topic ? `${rootUrl}/${category}/${topic}/` : `${rootUrl}/${category}/`;
const response = await got(currentUrl);
const $ = cheerio.load(response.data);

let items = $('.media-story-card__body__3tRWy a.media-story-card__heading__eqhp9, a.svelte-pxbp38, a.svelte-11dknnx, a.svelte-e21rsn')
.slice(0, limit)
.toArray()
.map((item) => {
item = $(item);
item.find('span.visually-hidden__hidden__2qXMW').remove();
const MUST_FETCH_BY_TOPICS = ['authors'];
const section_id = `/${category}/${topic ? `${topic}/` : ''}`;
const { title, description, rootUrl, response } = await (async () => {
if (!MUST_FETCH_BY_TOPICS.includes(category)) {
const rootUrl = 'https://www.reuters.com/pf/api/v3/content/fetch/articles-by-section-alias-or-id-v1';
const response = await got(rootUrl, {
searchParams: {
query: JSON.stringify({
offset: 0,
size: limit,
section_id,
website: 'reuters',
}),
},
}).json();
return {
title: item.text(),
link: new URL(item.prop('href'), rootUrl).href,
title: response.result.section.title,
description: response.result.section.section_about,
rootUrl,
response,
};
});
if (!items.length) {
const metadata = $('script#fusion-metadata').html();
const metadataObj = JSON.parse(metadata.match(/Fusion.globalContent=(\{[\s\S]*?});/)[1]);
const articles = metadataObj.arcResult?.articles ?? metadataObj.result?.articles ?? [];
items = articles.map((article) => ({
title: article.title,
link: rootUrl + article.canonical_url,
}));
}
} else {
const rootUrl = 'https://www.reuters.com/pf/api/v3/content/fetch/articles-by-topic-v1';
const response = await got(rootUrl, {
searchParams: {
query: JSON.stringify({
offset: 0,
size: limit,
topic_url: section_id,
website: 'reuters',
}),
},
}).json();

return {
title: `${response.result.topics[0].name} | Reuters`,
description: response.result.topics[0].entity_id,
rootUrl,
response,
};
}
})();

let items = response.result.articles.map((e) => ({
title: e.title,
link: new URL(e.canonical_url, rootUrl).href,
}));

items = await Promise.all(
items.map((item) =>
ctx.cache.tryGet(item.link, async () => {
Expand Down Expand Up @@ -88,10 +111,10 @@ module.exports = async (ctx) => {
);

ctx.state.data = {
title: $('head title').text(),
description: $('head meta[name=description]').attr('content'),
title,
description,
image: 'https://www.reuters.com/pf/resources/images/reuters/logo-vertical-default-512x512.png?d=116',
link: currentUrl,
link: `https://www.reuters.com${section_id}`,
item: items,
};
};
6 changes: 3 additions & 3 deletions website/docs/routes/traditional-media.md
Original file line number Diff line number Diff line change
Expand Up @@ -544,13 +544,13 @@ Parameters can be obtained from the official website, for instance:

| All | Aerospace & Defense | Autos & Transportation | Energy | Environment | Finance | Healthcare & Pharmaceuticals | Media & Telecom | Retail & Consumer | Sustainable Business | Charged | Future of Health | Future of Money | Take Five | Reuters Impact |
| --- | ------------------- | ---------------------- | ------ | ----------- | ------- | ---------------------------- | --------------- | ----------------- | -------------------- | ------- | ---------------- | --------------- | --------- | -------------- |
| | aerospace-defense | autos-transportation | energy | environment | finance | healthcare-pharmaceuticals | media-telecom | retail-consumer | sustainable-business | charged | future-of-health | futrue-of-money | take-five | reuters-impact |
| | aerospace-defense | autos-transportation | energy | environment | finance | healthcare-pharmaceuticals | media-telecom | retail-consumer | sustainable-business | charged | future-of-health | future-of-money | take-five | reuters-impact |

- `legal/:topic`:

| All | Goverment | Legal Industry | Litigation | Transaction |
| All | Government | Legal Industry | Litigation | Transactional |
| --- | --------- | -------------- | ---------- | ----------- |
| | goverment | legalindustry | litigation | transaction |
| | government | legalindustry | litigation | transactional |

- `authors/:topic`:

Expand Down

0 comments on commit 31b5618

Please sign in to comment.