Skip to content

Commit

Permalink
v8.0.8
Browse files Browse the repository at this point in the history
- Merge pr extractus#374 by @andremacola (issue extractus#373)
- Update dependencies
- Update CI config
- Fix function call in eval.js
  • Loading branch information
ndaidong committed Dec 5, 2023
1 parent f84aec2 commit 0fd6c66
Show file tree
Hide file tree
Showing 5 changed files with 21 additions and 18 deletions.
11 changes: 4 additions & 7 deletions .github/workflows/ci-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,13 @@ jobs:

strategy:
matrix:
node_version: [18.x, 20.x]
node_version: [18.x, 20.x, 21.x]

steps:
- uses: actions/checkout@v3

- name: setup Node.js v${{ matrix.node_version }}
uses: actions/setup-node@v3
uses: actions/setup-node@v4
with:
node-version: ${{ matrix.node_version }}

Expand All @@ -31,8 +31,8 @@ jobs:
npm run build --if-present
npm run test
- name: Coveralls GitHub Action
uses: coverallsapp/github-action@1.1.3
- name: Report Coveralls
uses: coverallsapp/github-action@v2
with:
github-token: ${{ secrets.GITHUB_TOKEN }}

Expand All @@ -43,6 +43,3 @@ jobs:
key: ${{ runner.os }}-node-${{ hashFiles('**/package-lock.json') }}
restore-keys: |
${{ runner.os }}-node-
2 changes: 1 addition & 1 deletion .github/workflows/codeql-analysis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ jobs:

steps:
- name: Checkout repository
uses: actions/checkout@v3
uses: actions/checkout@v4

# Initializes the CodeQL tools for scanning.
- name: Initialize CodeQL
Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ The result - `article` - can be `null` or an object with the following structure
favicon: String,
content: String,
published: Date String,
type: String, // page type
source: String, // original publisher
links: Array, // list of alternative links
ttr: Number, // time to read in second, 0 = unknown
Expand Down
11 changes: 8 additions & 3 deletions eval.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ import { readFileSync, writeFileSync, existsSync } from 'node:fs'
import { slugify } from 'bellajs'

import { isValid as isValidUrl } from './src/utils/linker.js'
import { extract } from './src/main.js'
import { extractFromHtml } from './src/main.js'

if (!existsSync('evaluation')) {
execSync('mkdir evaluation')
Expand All @@ -15,7 +15,12 @@ if (!existsSync('evaluation')) {
const extractFromUrl = async (url) => {
try {
console.time('extraction')
const art = await extract(url)
const res = await fetch(url)
const buffer = await res.arrayBuffer()
const decoder = new TextDecoder('iso-8859-1')
const html = decoder.decode(buffer)

const art = await extractFromHtml(html)
console.log(art)
const slug = slugify(art.title)
writeFileSync(`evaluation/${slug}.html`, art.content, 'utf8')
Expand All @@ -28,7 +33,7 @@ const extractFromUrl = async (url) => {
const extractFromFile = async (fpath) => {
try {
const html = readFileSync(fpath, 'utf8')
const art = await extract(html)
const art = await extractFromHtml(html)
console.log(art)
} catch (err) {
console.trace(err)
Expand Down
14 changes: 7 additions & 7 deletions package.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"version": "8.0.3",
"version": "8.0.4",
"name": "@extractus/article-extractor",
"description": "To extract main article from given URL",
"homepage": "https://github.com/extractus/article-extractor",
Expand Down Expand Up @@ -33,15 +33,15 @@
"@mozilla/readability": "^0.4.4",
"bellajs": "^11.1.2",
"cross-fetch": "^4.0.0",
"linkedom": "^0.15.1",
"linkedom": "^0.16.4",
"sanitize-html": "2.11.0"
},
"devDependencies": {
"@types/sanitize-html": "^2.9.0",
"eslint": "^8.47.0",
"https-proxy-agent": "^7.0.1",
"jest": "^29.6.2",
"nock": "^13.3.2"
"@types/sanitize-html": "^2.9.5",
"eslint": "^8.55.0",
"https-proxy-agent": "^7.0.2",
"jest": "^29.7.0",
"nock": "^13.4.0"
},
"keywords": [
"article",
Expand Down

0 comments on commit 0fd6c66

Please sign in to comment.